ES练习册

es索引结构和数据实例

这里提供索引结构和数据实例提供给大家使用练习，希望大家能够一起成长进步~~~~

java 复制代码

#添加索引
PUT /ecommerce_products
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "custom_lowercase": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": ["lowercase"]
        }
      }
    }
  },
  "mappings": {
    "dynamic": "strict",
    "properties": {
      "product_id": { "type": "keyword" }, 
      "name": {
        "type": "text",
        "fields": {
          "keyword": { "type": "keyword", "ignore_above": 256 }
        },
        "analyzer": "custom_lowercase"
      },
      "description": { "type": "text" },
      "price": { "type": "scaled_float", "scaling_factor": 100 },
      "in_stock": { "type": "boolean" },
      "categories": { "type": "keyword" },
      "tags": {
        "type": "keyword",
        "eager_global_ordinals": true
      },
      "created_at": { 
        "type": "date",
        "format": "strict_date_optional_time||epoch_millis"
      },
      "last_updated": { "type": "date" },
      "rating": { "type": "half_float" },
      "reviews": {
        "type": "nested",
        "properties": {
          "user_id": { "type": "keyword" },
          "score": { "type": "byte" },
          "comment": { "type": "text" },
          "created_at": { "type": "date" },
          "responses": {
            "type": "nested",
            "properties": {
              "admin_id": { "type": "keyword" },
              "response_text": { "type": "text" }
            }
          }
        }
      },
      "warehouse_location": { "type": "geo_point" },
      "supplier_info": {
        "properties": {
          "name": { "type": "keyword" },
          "ip_address": { "type": "ip" }
        }
      },
      "suggest": { "type": "completion" },
      "attributes": {
        "type": "object",
        "dynamic": true
      },
      "name_translations": {
        "type": "object",
        "properties": {
          "en": { "type": "text" },
          "zh": { "type": "text", "analyzer": "ik_max_word" },
          "es": { "type": "text" }
        }
      }
    }
  }
}

#查询索引
GET /ecommerce_products


#添加文档数据
#POST/索引库名/_doc/文档id

POST /ecommerce_products/_doc/1
{
  "product_id": "P123456",
  "name": "无线蓝牙耳机",
  "description": "高端降噪耳机，30小时超长续航",
  "price": 199.99,
  "in_stock": true,
  "categories": ["电子产品", "音频设备"],
  "tags": ["无线", "降噪", "新款"],
  "created_at": "2023-10-05T08:23:45Z",
  "last_updated": "2024-02-15",
  "rating": 4.5,
  "reviews": [
    {
      "user_id": "U1001",
      "score": 5,
      "comment": "音质非常出色",
      "created_at": "2023-12-01",
      "responses": [
        {
          "admin_id": "ADM001",
          "response_text": "感谢您的反馈！"
        }
      ]
    }
  ],
  "warehouse_location": {
    "lat": 40.7128,
    "lon": -74.0060
  },
  "supplier_info": {
    "name": "音科技术公司",
    "ip_address": "192.168.1.100"
  },
  "suggest": {
    "input": ["耳机", "蓝牙", "无线"],
    "weight": 34
  },
  "attributes": {
    "颜色": "黑色",
    "重量_kg": 0.25
  },
  "name_translations": {
    "en": "Wireless Headphones",
    "zh": "无线蓝牙耳机",
    "es": "Audífonos inalámbricos Bluetooth"
  }
}

POST /ecommerce_products/_doc/2
{
  "product_id": "P789012",
  "name": "智能健身手环",
  "description": "防水运动手环，支持心率监测和睡眠追踪",
  "price": 79.99,
  "in_stock": false,
  "categories": ["可穿戴设备", "运动健康"],
  "tags": ["防水", "心率监测", "入门款"],
  "created_at": "2023-11-20",
  "last_updated": "2024-03-10T14:30:00",
  "rating": 4.2,
  "reviews": [
    {
      "user_id": "U2002",
      "score": 4,
      "comment": "性价比不错，但APP需要改进",
      "created_at": "2024-01-15",
      "responses": [
        {
          "admin_id": "ADM002",
          "response_text": "我们会持续优化软件体验"
        }
      ]
    }
  ],
  "warehouse_location": "31.2304,121.4737",
  "supplier_info": {
    "name": "智动科技",
    "ip_address": "10.20.30.40"
  },
  "suggest": {
    "input": ["手环", "运动", "健康"],
    "weight": 28
  },
  "attributes": {
    "腕带材质": "硅胶",
    "屏幕类型": "OLED"
  }
}

POST /ecommerce_products/_doc/3
{
  "product_id": "P345678",
  "name": "智能温控水杯",
  "description": "支持APP控制的保温水杯，12小时长效保温",
  "price": 149.5,
  "in_stock": true,
  "categories": ["生活家电", "智能硬件"],
  "tags": ["保温", "智能控制", "礼品"],
  "created_at": "2024-02-01T09:15:30Z",
  "rating": 4.8,
  "reviews": [
    {
      "user_id": "U3003",
      "score": 5,
      "comment": "冬天保持水温效果非常好",
      "created_at": "2024-03-01",
      "responses": []
    }
  ],
  "warehouse_location": "22.3193,114.1694",
  "supplier_info": {
    "name": "智联家居",
    "ip_address": "172.16.0.100"
  },
  "suggest": {
    "input": ["水杯", "保温杯", "智能"],
    "weight": 42
  },
  "attributes": {
    "容量_ml": 500,
    "材质": "不锈钢"
  },
  "name_translations": {
    "en": "Smart Thermos Cup",
    "zh": "智能温控水杯",
    "es": "Taza termo inteligente"
  }
}


POST /ecommerce_products/_doc/4
{
  "product_id": "P456789",
  "name": "健康监测智能手表",
  "description": "1.5英寸AMOLED屏，支持血氧心率监测，50米防水",
  "price": 899.0,
  "in_stock": true,
  "categories": ["可穿戴设备", "健康监测"],
  "tags": ["防水", "长续航", "新品"],
  "created_at": "2024-03-20T10:00:00Z",
  "rating": 4.7,
  "reviews": [
    {
      "user_id": "U4004",
      "score": 5,
      "comment": "游泳监测非常准确",
      "created_at": "2024-04-05",
      "responses": [
        {
          "admin_id": "ADM003",
          "response_text": "感谢选择我们的产品！"
        }
      ]
    },
     {
      "user_id": "U3003",
      "score": 5,
      "comment": "喜欢",
      "created_at": "2024-04-05",
      "responses": [
        {
          "admin_id": "ADM003",
          "response_text": "感谢选择我们的产品！"
        }
      ]
    }
  ],
  "warehouse_location": "22.2833,114.1667",  
  "supplier_info": {
    "name": "健康科技集团",
    "ip_address": "10.88.10.25"
  },
  "attributes": {
    "颜色": "曜石黑",
    "表带材质": "氟橡胶",
    "电池容量_mAh": 450
  },
  "name_translations": {
    "en": "Health Monitoring Smart Watch",
    "zh": "健康监测智能手表",
    "es": "Taza termo inteligente"
  }
}

POST /ecommerce_products/_doc/5
{
  "product_id": "P567890",
  "name": "静音空气净化器",
  "description": "CADR 600m³/h，智能感应PM2.5，适用80㎡空间",
  "price": 2499.0,
  "in_stock": true,
  "categories": ["生活家电", "环境电器"],
  "tags": ["智能感应", "静音"],
  "created_at": "2023-12-15",
  "rating": 4.3,
  "reviews": [
    {
      "user_id": "U3003",
      "score": 4,
      "comment": "睡眠模式确实安静",
      "created_at": "2024-01-10",
      "responses": []
    }
  ],
  "warehouse_location": { 
    "lat": 30.5728,
    "lon": 104.0668
  },  
  "supplier_info": {
    "name": "清洁科技公司",
    "ip_address": "172.20.10.5"
  },
  "attributes": {
    "滤芯类型": "复合滤网",
    "噪音分贝": 28,
    "适用面积_㎡": 80
  },
  "name_translations": {
  "en": "Health Monitoring Smart Watch",
    "zh": "空气净化器",
    "es": "Taza termo inteligente"
  }
}

POST /ecommerce_products/_doc/6
{
  "product_id": "P678901",
  "name": "全掌气垫跑步鞋",
  "description": "轻量透气网面，ZOOM AIR缓震技术",
  "price": 699.0,
  "in_stock": false,
  "categories": ["运动服饰", "鞋类"],
  "tags": ["促销", "限量款"],
  "created_at": "2024-02-28T14:30:00Z",
  "rating": 4.6,
  "reviews": [
    {
      "user_id": "U6006",
      "score": 2,
      "comment": "尺码偏小建议买大一码",
      "created_at": "2024-03-15",
      "responses": [
        {
          "admin_id": "ADM002",
          "response_text": "已反馈给质检部门改进"
        }
      ]
    }
  ],
  "warehouse_location": "23.1291,113.2644",  
  "supplier_info": {
    "name": "运动装备制造",
    "ip_address": "192.168.2.200"
  },
  "attributes": {
    "颜色": "荧光绿/黑",
    "尺码": "42",
    "重量_g": 320
  },
  "name_translations": {
  "en": "Health Monitoring Smart Watch",
    "zh": "全掌气垫跑步鞋",
    "es": "Taza termo inteligente"
  }
}

POST /ecommerce_products/_doc/8
{
  "product_id": "P890123",
  "name": "4K全景云台摄像头",
  "description": "360°全景追踪，红外夜视，支持AI人形检测",
  "price": 399.0,
  "in_stock": true,
  "categories": ["智能硬件", "安防"],
  "tags": ["夜视", "AI识别"],
  "created_at": "2024-01-10T08:00:00Z",
  "rating": 4.0,
  "reviews": [
    {
      "user_id": "U8008",
      "score": 2,
      "comment": "夜间画质有待提升",
      "created_at": "2024-02-01",
      "responses": []
    }
  ],
  "warehouse_location": "39.9042,116.4074",  
  "supplier_info": {
    "name": "智能安防科技",
    "ip_address": "192.168.5.100"
  },
  "attributes": {
    "原价": "599",
    "存储方式": ["云存储", "本地SD卡"],
    "夜视距离_米": 15
  },
  "name_translations": {
  "en": "Health Monitoring Smart Watch",
    "zh": "4K全景云台摄像头",
    "es": "Taza termo inteligente"
  }
}

#查询文档
#GET/索引库名/_doc/文档id
GET /ecommerce_products/_doc/1


#增量修改文档
#POST/索引库名/_update/文档id
POST /ecommerce_products/_update/8
{
  "doc": {
   "tags": ["夜视", "AI识别","促销"]
  }
}

函数类型	作用	示例
`weight`	固定权重值	`{ "weight": 2.0, "filter": { "term": { ... } } }`
`field_value_factor`	基于字段值的评分计算	`{ "field": "rating", "factor": 1.5 }`
`random_score`	随机评分（需指定种子）	`{ "random_score": { "seed": 用户ID } }`
`script_score`	自定义脚本计算评分	`{ "script": "doc['price'].value * 0.1" }`
`decay`	按距离/时间衰减评分（如高斯衰减）	见下文详细示例

模式	说明
`sum`	所有函数评分相加（默认）
`avg`	取平均值
`max`	取最大值
`min`	取最小值
`multiply`	所有函数评分相乘

模式	公式
`sum`	最终分 = 原始分 + 函数评分
`replace`	最终分 = 函数评分
`multiply`	最终分 = 原始分 × 函数评分
`avg`	最终分 = (原始分 + 函数评分)/2

Aggs聚合

大白话的理解就是：

你让他aggs一下type，他就把不同类型type以及数量返回给你

一、聚合的核心概念

聚合（Aggregations） 是 Elasticsearch 中用于对数据进行统计分析的功能，类似于 SQL 的 GROUP BY + 统计函数（如 COUNT、AVG）。
核心用途：

数据分组统计（如按分类统计商品数量）
计算指标（如平均价格、最高评分）
多维度交叉分析（如时间+地域的销售分布）

二、基础语法

java 复制代码

{
  "aggs": {
    "agg_name": {
      "agg_type": {
        "field": "field_name",
        "size": 10 (可选参数，具体取决于聚合类型)
      }
    }
  }
}

agg_name: 自定义的聚合名称，用于标识聚合结果。
agg_type : 聚合的类型，例如 terms、avg、sum 等。
field_name: 指定要进行聚合操作的字段。

三、常见聚合类型

Terms Aggregation （按值分组）

用于根据字段的唯一值对文档进行分组。
适用于分析离散值，如标签、类别等。

java 复制代码

{
  "aggs": {
    "group_by_status": {
      "terms": {
        "field": "status.keyword"
      }
    }
  }
}

Range Aggregation （范围分组）

用于根据数值区间对文档进行分组。

java 复制代码

{
  "aggs": {
    "price_ranges": {
      "range": {
        "field": "price",
        "ranges": [
          { "to": 100 },
          { "from": 100, "to": 200 },
          { "from": 200 }
        ]
      }
    }
  }
}

Date Histogram Aggregation （日期直方图）

按时间间隔对文档进行分组。

java 复制代码

{
  "aggs": {
    "sales_over_time": {
      "date_histogram": {
        "field": "sale_date",
        "calendar_interval": "month"
      }
    }
  }
}

Avg, Sum, Min, Max Aggregations （统计计算）

用于计算字段的平均值、总和、最小值、最大值。

java 复制代码

{
  "aggs": {
    "average_price": {
      "avg": {
        "field": "price"
      }
    }
  }
}

Nested Aggregation （嵌套聚合）

在嵌套字段上进行聚合。

java 复制代码

{
  "aggs": {
    "nested_comments": {
      "nested": {
        "path": "comments"
      },
      "aggs": {
        "comment_authors": {
          "terms": {
            "field": "comments.author"
          }
        }
      }
    }
  }
}

组合聚合

可以将多个聚合嵌套在一起，以得到更复杂的查询。例如，首先对一个字段进行分组，然后对每个分组进行统计计算。

java 复制代码

{
  "aggs": {
    "group_by_status": {
      "terms": {
        "field": "status.keyword"
      },
      "aggs": {
        "average_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

四、实战练习

java 复制代码

#需求：
#统计每个商品的评论回复率（有回复的评论数 / 总评论数），按回复率降序排列
GET /ecommerce_products/_search
{
  "size": 0,
  "aggs": {
    "products": {
      "nested": { "path": "reviews" },
      "aggs": {
        "total_comments": { "value_count": { "field": "reviews.user_id" } },
        "replied_comments": {
          "nested": { "path": "reviews.responses" },
          "aggs": { "response_count": { "value_count": { "field": "reviews.responses.admin_id" } } }
        }
      }
    }
  }
}

五、字段详解

`bucket_script`

作用：对于已有的聚合结果进行二次计算，应用于计算比例、差值等

基础语法：

java 复制代码

{
  "aggs": {
    "自定义聚合名称": {
      "bucket_script": {
        "buckets_path": {
          "变量1": "聚合路径1",
          "变量2": "聚合路径2"
        },
        "script": "params.变量1 + params.变量2"
      }
    }
  }
}

脚本

doc['field']的通用规则

适用字段类型

适用字段类型 ：仅适用于非文本字段（如 integer、long、double、date、keyword 等）。
不支持文本字段 ：如果字段是 text 类型且未设置 fielddata=true，直接访问 doc['text_field'] 会报错。
返回值类型 ：返回一个 FieldValues 对象，需通过特定方法获取值。

常用方法

(1) `.value`

作用：获取字段的第一个值（单值字段直接取值，多值字段取第一个值）。
示例：

bash 复制代码

// 单值字段：直接返回数值
double score = doc['score'].value;

// 多值字段：返回第一个值（如 [80, 90] 返回 80）
int firstScore = doc['scores'].value;

(2) `.values`

作用：获取字段的所有值（返回数组形式）。
示例：

java 复制代码

// 多值字段：返回数组 [80, 90]
double[] scores = doc['scores'].values;

// 遍历多值字段
for (double s : doc['scores'].values) {
  total += s;
}

(3) `.size()`

作用：获取字段值的数量（单值字段返回 1，多值字段返回实际数量）。
示例：

java 复制代码

if (doc['score'].size() > 0) {  // 判断字段是否存在且非空
  return doc['score'].value;
} else {
  return 0;
}

(4) `.empty`

作用：检查字段是否为空（无值）。
示例：

java 复制代码

if (!doc['score'].empty) {
  return doc['score'].value;
}

(5) `.get(int index)`

作用：获取数组字段的指定索引值。
示例：

bash 复制代码

// 获取多值字段的第二个值
if (doc['scores'].size() >= 2) {
  double secondScore = doc['scores'].get(1);
}

(6) `.count()`

作用：统计字段中非空值的数量（与 .size() 不同，可能过滤空值）。
示例：

java 复制代码

int validScores = doc['scores'].count();

(7) 聚合方法

适用场景：对多值字段进行聚合计算。
常用方法：

java 复制代码

.min()  // 最小值
.max()  // 最大值
.sum()  // 总和
.average()  // 平均值

方法	用途	示例场景
`.value`	获取单值字段值或多值字段的第一个值	单值数值计算
`.values`	获取多值字段的所有值	遍历求和、求平均
`.size()`	获取值的数量（包括空值）	判断字段是否为空
`.empty`	检查字段是否为空	空值保护逻辑
`.get(index)`	获取多值字段的指定索引值	访问特定位置的元素
`.min()/.max()`	多值字段的最小值/最大值	统计范围筛选

补充

size()、.count() 和 .empty 方法

以下是针对 Elasticsearch Painless 脚本中 doc['field'] 的 .size()、.count() 和 .empty 方法在处理 空值、数组、字段不存在 时的具体行为示例。

示例数据

假设 Elasticsearch 中有如下文档：

java 复制代码

{
  "id": 1,
  "name": "Alice",
  "scores": [85, 90, null],   // 多值字段，包含 null
  "empty_array": [],          // 空数组
  "nullable_field": null      // 单值字段，值为 null
}

注意：字段 missing_field 不存在。

Painless 脚本示例

java 复制代码

// 检查 scores 字段
def scores_size = doc['scores'].size();      // 返回 3
def scores_count = doc['scores'].count();    // 返回 2（过滤 null）
def scores_empty = doc['scores'].empty;      // 返回 false

// 检查 empty_array 字段
def empty_array_size = doc['empty_array'].size();  // 返回 0
def empty_array_count = doc['empty_array'].count();// 返回 0
def empty_array_empty = doc['empty_array'].empty;  // 返回 true

// 检查 nullable_field 字段
def nullable_size = doc['nullable_field'].size();  // 返回 1（单值 null）
def nullable_count = doc['nullable_field'].count();// 返回 0（过滤 null）
def nullable_empty = doc['nullable_field'].empty;  // 返回 false（字段存在）

// 检查 missing_field 字段
def missing_size = doc['missing_field'].size();    // 抛出异常（字段不存在）
def missing_empty = doc['missing_field'].empty;    // 抛出异常（字段不存在）

字段类型	`.size()`	`.count()`	`.empty`
多值带 null (如 `scores: [85, 90, null]`)	返回总数量（含 null）示例：`3`	返回非 null 值数量示例：`2`	`false`（存在且非空数组）
空数组 (如 `empty_array: []`)	`0`	`0`	`true`（存在但无值）
单值 null (如 `nullable_field: null`)	`1`（单值字段视为数组 [null]）	`0`（过滤 null）	`false`（字段存在）
字段不存在 (如 `missing_field`)	报错	报错	报错

2.判空处理

java 复制代码

// 正确做法：先检查字段是否存在
if (doc.containsKey('score') && !doc['score'].empty) {
    return doc['score'].value;
} else {
    return 0;
}

Scroll滚动查询

复制代码

POST /your_index/_search?scroll=5m
{
  "size": 100,
  "query": { "match_all": {} }
}

es索引结构和数据实例

相关性算分专线练习

一、`function_score` 的核心作用

二、基础语法结构

三、核心参数详解

1. `query` (主查询)

2. `functions` (评分函数)

3. `score_mode` (函数评分组合方式)

4. `boost_mode` (最终评分计算方式)

四、实战练习

Aggs聚合

一、聚合的核心概念

二、基础语法

三、常见聚合类型

四、实战练习

五、字段详解

`bucket_script`

脚本

doc['field']的通用规则

适用字段类型

常用方法

(1) `.value`

(2) `.values`

(3) `.size()`

(4) `.empty`

(5) `.get(int index)`

(6) `.count()`

(7) 聚合方法

补充

Scroll滚动查询

ES练习册

es索引结构和数据实例

相关性算分专线练习

一、function_score 的核心作用

二、基础语法结构

三、核心参数详解

1. query (主查询)

2. functions (评分函数)

3. score_mode (函数评分组合方式)

4. boost_mode (最终评分计算方式)

四、实战练习

Aggs聚合

一、聚合的核心概念

二、基础语法

三、常见聚合类型

四、实战练习

五、字段详解

bucket_script

脚本

doc['field']的通用规则

适用字段类型

常用方法

(1) .value

(2) .values

(3) .size()

(4) .empty

(5) .get(int index)

(6) .count()

(7) 聚合方法

补充

Scroll滚动查询

一、`function_score` 的核心作用

1. `query` (主查询)

2. `functions` (评分函数)

3. `score_mode` (函数评分组合方式)

4. `boost_mode` (最终评分计算方式)

`bucket_script`

(1) `.value`

(2) `.values`

(3) `.size()`

(4) `.empty`

(5) `.get(int index)`

(6) `.count()`