创建索引my_index:
curl -X PUT "http://localhost:9200/my_index" \
-H "Content-Type: application/json" \
-d'
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"refresh_interval": "30s"
},
"mappings": {
"properties": {
"age": {
"type": "integer"
},
"name": {
"type": "text"
}
}
}
}'
bulk.json中存储数据:
{"index": {"_index": "my_index", "_id": "1"}}
{"name": "张三", "age": 25}
{"index": {"_index": "my_index", "_id": "2"}}
{"name": "李四", "age": 30}
{"index": {"_index": "my_index", "_id": "3"}}
{"name": "王五", "age": 35}
bulk load插入:
curl -X POST "http://localhost:9200/_bulk" -H "Content-Type: application/json" --data-binary @bulk.json
查看行数:
curl -X GET "http://localhost:9200/my_index/_count"
Shell脚本:
bash
#!/bin/bash
# 配置参数
SOURCE_FILE="processed_output.json" # 源JSON文件
TEMP_BULK_FILE="bulk.json" # 临时批量文件
ES_URL="localhost:9200/_bulk" # ES Bulk API地址
BATCH_LINES=2 # 每次读取的行数
# 检查源文件是否存在
if [ ! -f "$SOURCE_FILE" ]; then
echo "错误:源文件 $SOURCE_FILE 不存在!"
exit 1
fi
# 检查curl是否安装
if ! command -v curl &> /dev/null; then
echo "错误:curl未安装,请先安装curl!"
exit 1
fi
# 初始化行计数器
line_count=0
# 清空临时文件
> "$TEMP_BULK_FILE"
# 逐行读取源文件
while IFS= read -r line; do
# 跳过空行
if [ -z "$line" ]; then
continue
fi
# 将当前行写入临时文件
echo "$line" >> "$TEMP_BULK_FILE"
((line_count++))
# 当累计行数达到设定值时执行POST请求
if [ $line_count -eq $BATCH_LINES ]; then
echo "=== 发送批量数据(行数:$line_count)==="
# 执行curl POST请求
response=$(curl -s -X POST "$ES_URL" \
-H "Content-Type: application/json" \
--data-binary "@$TEMP_BULK_FILE" \
-w "%{http_code}" -o "es_response.tmp")
# 获取HTTP状态码(最后3位)
http_code=${response: -3}
# 获取响应内容
response_content=$(cat "es_response.tmp")
# 检查请求是否成功
if [ "$http_code" = "200" ]; then
echo "✅ 请求成功,HTTP状态码:$http_code"
echo "响应内容:$response_content"
else
echo "❌ 请求失败,HTTP状态码:$http_code"
echo "错误响应:$response_content"
# 可选:失败时退出脚本
# exit 1
fi
# 重置计数器和临时文件
line_count=0
> "$TEMP_BULK_FILE"
# 可选:添加延迟,避免ES压力过大
# sleep 0.5
fi
done < "$SOURCE_FILE"
# 处理剩余不足批量行数的内容
if [ $line_count -gt 0 ]; then
echo "=== 发送剩余数据(行数:$line_count)==="
response=$(curl -s -X POST "$ES_URL" \
-H "Content-Type: application/json" \
--data-binary "@$TEMP_BULK_FILE" \
-w "%{http_code}" -o "es_response.tmp")
http_code=${response: -3}
response_content=$(cat "es_response.tmp")
if [ "$http_code" = "200" ]; then
echo "✅ 剩余数据请求成功,HTTP状态码:$http_code"
echo "响应内容:$response_content"
else
echo "❌ 剩余数据请求失败,HTTP状态码:$http_code"
echo "错误响应:$response_content"
fi
fi
# 清理临时文件
rm -f "es_response.tmp"
> "$TEMP_BULK_FILE"
echo "=== 所有数据处理完成 ==="
exit 0