Elasticsearch 重建索引 数据迁移

Elasticsearch 重建索引 数据迁移

大家都知道,es的索引创建完成之后就不可以再修改了,包括你想更改字段属性或者是分词方式等。那么随着业务数据量的发展,可能会出现需要修改索引,或者说叫做重建索引的情况,那么这个时候应该怎么操作呢?本文主要就这个问题进行讨论处理。

处理流程

整体的重建索引的处理流程就是,先创建一个临时索引,将原始索引中的数据迁移到临时索引,然后再删除原始索引,重新创建原始索引后,在将临时索引中的数据迁回到重建索引,从而完成索引的重建操作。

创建临时索引

在创建索引之前,我们先看一下原始的 es 索引结构,在 kibana 开发工具命令行页面执行命令

GET crm_meiqia_conversation/_mapping

这里我需要将字段 convId 的字段类型 改为 text ,那么这个时候我就需要创建一个临时索引 crm_meiqia_conversation_tmp 将字段 convId 的字段类型改为 text ,原始 convId 属性如下图

整个执行命令代码如下

PUT /crm_meiqia_conversation_tmp
{
  "mappings" : {
      "meiqiaConversation" : {
        "properties" : {
          "convId" : {
            "type" : "text"
          },
          "enterpriseId" : {
            "type" : "long"
          },
          "devClientId" : {
            "type" : "text"
          },
          "pageFromUrl" : {
            "type" : "text"
          },
          "pageLandUrl" : {
            "type" : "text"
          },
          "pageLandTitle" : {
            "type" : "text"
          },
          "pageConvUrl" : {
            "type" : "text"
          },
          "pageConvTitle" : {
            "type" : "text"
          },
          "searchEngineName" : {
            "type" : "text"
          },
          "searchEngineKw" : {
            "type" : "text"
          },
          "visitorIp" : {
            "type" : "text"
          },
          "visitorLocation" : {
            "type" : "text"
          },
          "visitorOs" : {
            "type" : "text"
          },
          "visitorBrowser" : {
            "type" : "text"
          },
          "visitorTags" : {
            "type" : "text"
          },
          "clientId" : {
            "type" : "long"
          },
          "agentAccount" : {
            "type" : "text"
          },
          "agentName" : {
            "type" : "text"
          },
          "agentId" : {
            "type" : "text"
          },
          "agentNickName" : {
            "type" : "text"
          },
          "groupId" : {
            "type" : "long"
          },
          "groupName" : {
            "type" : "text"
          },
          "convStartTm" : {
            "type" : "long"
          },
          "convStartDate" : {
            "type" : "date"
          },
          "convEndTm" : {
            "type" : "long"
          },
          "convEndDate" : {
            "type" : "date"
          },
          "convFirstRespWaitInSecs" : {
            "type" : "long"
          },
          "convAgentMsgCount" : {
            "type" : "long"
          },
          "convVisitorMsgCount" : {
            "type" : "long"
          },
          "convQualityGrade" : {
            "type" : "text"
          },
          "convLeads" : {
            "type" : "text"
          },
          "commentLevel" : {
            "type" : "long"
          },
          "commentContent" : {
            "type" : "text"
          },
          "platform" : {
            "type" : "text"
          },
          "summaryContent" : {
            "type" : "text"
          },
          "summaryUpdateAt" : {
            "type" : "text"
          },
          "sourceType" : {
            "type" : "text"
          },
          "sourceField" : {
            "type" : "text"
          },
          "agentRespDuration" : {
            "type" : "long"
          },
          "effective" : {
            "type" : "text"
          },
          "missed" : {
            "type" : "text"
          },
          "converseDuration" : {
            "type" : "long"
          },
          "appName" : {
            "type" : "text"
          },
          "mainChannel" : {
            "type" : "text"
          },
          "mainChannelName" : {
            "type" : "text"
          },
          "subChannel" : {
            "type" : "text"
          },
          "subChannelName" : {
            "type" : "text"
          },
          "searchEngine" : {
            "type" : "text"
          },
          "clientInfo" : {
            "properties" : {
              "address" : {
                "type" : "text"
              },
              "age" : {
                "type" : "long"
              },
              "channelName" : {
                "type" : "text"
              },
              "comment" : {
                "type" : "text"
              },
              "contact" : {
                "type" : "text"
              },
              "convId" : {
                "type" : "long"
              },
              "email" : {
                "type" : "text"
              },
              "enterpriseId" : {
                "type" : "long"
              },
              "followSource" : {
                "type" : "text"
              },
              "gender" : {
                "type" : "text"
              },
              "infoId" : {
                "type" : "long"
              },
              "jijiaoCity" : {
                "type" : "text"
              },
              "jijiaoDistrict" : {
                "type" : "text"
              },
              "jijiaoLevel" : {
                "type" : "text"
              },
              "jijiaoProvince" : {
                "type" : "text"
              },
              "mTrackId" : {
                "type" : "text"
              },
              "name" : {
                "type" : "text"
              },
              "openid" : {
                "type" : "text"
              },
              "qq" : {
                "type" : "text"
              },
              "sourceName" : {
                "type" : "text"
              },
              "tel" : {
                "type" : "text"
              },
              "trackId" : {
                "type" : "text"
              },
              "uid" : {
                "type" : "text"
              },
              "vid" : {
                "type" : "text"
              },
              "visitorName" : {
                "type" : "text"
              },
              "weibo" : {
                "type" : "text"
              },
              "weixin" : {
                "type" : "text"
              },
              "appChannel" : {
                "type" : "text"
              }
            }
          },
          "convContent" : {
            "properties" : {
              "contentId" : {
                "type" : "long"
              },
              "convId" : {
                "type" : "long"
              },
              "convFrom" : {
                "type" : "text"
              },
              "timestamp" : {
                "type" : "long"
              },
              "content" : {
                "type" : "text",
                "analyzer":"standard"
              },
              "remoteContent" : {
                "type" : "text"
              },
              "convType" : {
                "type" : "text"
              }
            }
          },
          "convTag" : {
            "properties" : {
              "tagId" : {
                "type" : "long"
              },
              "convId" : {
                "type" : "long"
              },
              "level" : {
                "type" : "long"
              },
              "value" : {
                "type" : "text"
              }
            }
          }
        }
      }
  },
  "settings" : {
      "number_of_shards":2, 
      "number_of_replicas" : 1,
      "refresh_interval":"1s"
  }
}

在 kibana 工具页面点击执行按钮

这里可以看到执行命令报错 400 根据提示信息来看 说明当前 es 中已经存在索引 crm_meiqia_conversation_tmp ,那么执行删除索引命令,删除后再执行刚才创建临时索引命令

DELETE /crm_meiqia_conversation_tmp

再次执行创建临时索引命令,执行成功

数据迁移

临时索引创建完成之后,我们就可以将原始索引中的数据先迁移到临时索引中,通过 ES 提供了 _reindex 这个API 进行数据复制迁移,执行命令

POST _reindex
{  
  "source": {  
    "index": "crm_meiqia_conversation",
    "size":500
  },  
  "dest": {  
    "index": "crm_meiqia_conversation_tmp"  
  }
}

或者 异步迁移数据

POST _reindex?wait_for_completion=false
{  
  "source": {  
    "index": "crm_meiqia_conversation",
    "size":500
  },  
  "dest": {  
    "index": "crm_meiqia_conversation_tmp"  
  }
  
}

其中,source 对应的是原始索引,dest 对应的是新建的临时索引,参数 size 表示每次执行的数据量为500 条,循环执行直到数据迁移复制结束。默认情况下, _reindex 使用 1000 进行批量操作,迁移成功如图

这个时候我们再来看一下原始索引中数据总数 crm_meiqia_conversation 与临时索引 crm_meiqia_conversation_tmp 中数据总数是否一致,执行命令

GET crm_meiqia_conversation/_count
GET crm_meiqia_conversation_tmp/_count

执行结果如图

那么这样就完成了数据从原始索引迁移复制到临时索引的操作。

重建索引

这个时候就需要执行命令删除原始索引 crm_meiqia_conversation ,然后按照临时索引的 创建语句 创建新的索引,最后再将临时索引中的数据 迁移复制到 新建的原始索引中去,执行命令

# 删除原始索引
DELETE /crm_meiqia_conversation
# 创建更改字段后的新的原始索引 
PUT /crm_meiqia_conversation
{
  "mappings" : {
      "meiqiaConversation" : {
        "properties" : {
          "convId" : {
            "type" : "text"
          },
          "enterpriseId" : {
            "type" : "long"
          },
          "devClientId" : {
            "type" : "text"
          },
          "pageFromUrl" : {
            "type" : "text"
          },
          "pageLandUrl" : {
            "type" : "text"
          },
          "pageLandTitle" : {
            "type" : "text"
          },
          "pageConvUrl" : {
            "type" : "text"
          },
          "pageConvTitle" : {
            "type" : "text"
          },
          "searchEngineName" : {
            "type" : "text"
          },
          "searchEngineKw" : {
            "type" : "text"
          },
          "visitorIp" : {
            "type" : "text"
          },
          "visitorLocation" : {
            "type" : "text"
          },
          "visitorOs" : {
            "type" : "text"
          },
          "visitorBrowser" : {
            "type" : "text"
          },
          "visitorTags" : {
            "type" : "text"
          },
          "clientId" : {
            "type" : "long"
          },
          "agentAccount" : {
            "type" : "text"
          },
          "agentName" : {
            "type" : "text"
          },
          "agentId" : {
            "type" : "text"
          },
          "agentNickName" : {
            "type" : "text"
          },
          "groupId" : {
            "type" : "long"
          },
          "groupName" : {
            "type" : "text"
          },
          "convStartTm" : {
            "type" : "long"
          },
          "convStartDate" : {
            "type" : "date"
          },
          "convEndTm" : {
            "type" : "long"
          },
          "convEndDate" : {
            "type" : "date"
          },
          "convFirstRespWaitInSecs" : {
            "type" : "long"
          },
          "convAgentMsgCount" : {
            "type" : "long"
          },
          "convVisitorMsgCount" : {
            "type" : "long"
          },
          "convQualityGrade" : {
            "type" : "text"
          },
          "convLeads" : {
            "type" : "text"
          },
          "commentLevel" : {
            "type" : "long"
          },
          "commentContent" : {
            "type" : "text"
          },
          "platform" : {
            "type" : "text"
          },
          "summaryContent" : {
            "type" : "text"
          },
          "summaryUpdateAt" : {
            "type" : "text"
          },
          "sourceType" : {
            "type" : "text"
          },
          "sourceField" : {
            "type" : "text"
          },
          "agentRespDuration" : {
            "type" : "long"
          },
          "effective" : {
            "type" : "text"
          },
          "missed" : {
            "type" : "text"
          },
          "converseDuration" : {
            "type" : "long"
          },
          "appName" : {
            "type" : "text"
          },
          "mainChannel" : {
            "type" : "text"
          },
          "mainChannelName" : {
            "type" : "text"
          },
          "subChannel" : {
            "type" : "text"
          },
          "subChannelName" : {
            "type" : "text"
          },
          "searchEngine" : {
            "type" : "text"
          },
          "clientInfo" : {
            "properties" : {
              "address" : {
                "type" : "text"
              },
              "age" : {
                "type" : "long"
              },
              "channelName" : {
                "type" : "text"
              },
              "comment" : {
                "type" : "text"
              },
              "contact" : {
                "type" : "text"
              },
              "convId" : {
                "type" : "long"
              },
              "email" : {
                "type" : "text"
              },
              "enterpriseId" : {
                "type" : "long"
              },
              "followSource" : {
                "type" : "text"
              },
              "gender" : {
                "type" : "text"
              },
              "infoId" : {
                "type" : "long"
              },
              "jijiaoCity" : {
                "type" : "text"
              },
              "jijiaoDistrict" : {
                "type" : "text"
              },
              "jijiaoLevel" : {
                "type" : "text"
              },
              "jijiaoProvince" : {
                "type" : "text"
              },
              "mTrackId" : {
                "type" : "text"
              },
              "name" : {
                "type" : "text"
              },
              "openid" : {
                "type" : "text"
              },
              "qq" : {
                "type" : "text"
              },
              "sourceName" : {
                "type" : "text"
              },
              "tel" : {
                "type" : "text"
              },
              "trackId" : {
                "type" : "text"
              },
              "uid" : {
                "type" : "text"
              },
              "vid" : {
                "type" : "text"
              },
              "visitorName" : {
                "type" : "text"
              },
              "weibo" : {
                "type" : "text"
              },
              "weixin" : {
                "type" : "text"
              },
              "appChannel" : {
                "type" : "text"
              }
            }
          },
          "convContent" : {
            "properties" : {
              "contentId" : {
                "type" : "long"
              },
              "convId" : {
                "type" : "long"
              },
              "convFrom" : {
                "type" : "text"
              },
              "timestamp" : {
                "type" : "long"
              },
              "content" : {
                "type" : "text",
                "analyzer":"standard"
              },
              "remoteContent" : {
                "type" : "text"
              },
              "convType" : {
                "type" : "text"
              }
            }
          },
          "convTag" : {
            "properties" : {
              "tagId" : {
                "type" : "long"
              },
              "convId" : {
                "type" : "long"
              },
              "level" : {
                "type" : "long"
              },
              "value" : {
                "type" : "text"
              }
            }
          }
        }
      }
  },
  "settings" : {
      "number_of_shards":2, 
      "number_of_replicas" : 1,
      "refresh_interval":"1s"
  }
}
# 迁移复制数据 临时索引》》》新的原始索引
POST _reindex
{  
  "source": {  
    "index": "crm_meiqia_conversation_tmp",
    "size":500
  },  
  "dest": {  
    "index": "crm_meiqia_conversation"  
  }
}

最后执行成功后,完成本次关于 索引 crm_meiqia_conversation 的更改字段属性 的操作

写在最后

其实对于 es 更改索引字段的操作,确实比较费劲,需要先创建临时索引,转移复制数据后,删除原始索引,再创建新的索引,并把临时索引的数据再迁移回新的索引中。所以在创建 es 索引之处就需要综合考量,将字段的属性设计以及索引结构设计做到准确,防止后续出现这样的情况比较费劲。另外如果待迁移索引的数据量比较大的话,来回迁移数据除了耗时以外,还会需要一个较大的磁盘空间才能完成操作,不然会报磁盘不足的错误提示的。

相关推荐
梦幻通灵1 小时前
ES分词环境实战
大数据·elasticsearch·搜索引擎
Elastic 中国社区官方博客1 小时前
Elasticsearch 中的热点以及如何使用 AutoOps 解决它们
大数据·运维·elasticsearch·搜索引擎·全文检索
小黑屋说YYDS7 小时前
ElasticSearch7.x入门教程之索引概念和基础操作(三)
elasticsearch
Java 第一深情9 小时前
Linux上安装单机版ElasticSearch6.8.1
linux·elasticsearch·全文检索
KevinAha1 天前
Elasticsearch 6.8 分析器
elasticsearch
wuxingge1 天前
elasticsearch7.10.2集群部署带认证
运维·elasticsearch
Elastic 中国社区官方博客1 天前
Elasticsearch:如何部署文本嵌入模型并将其用于语义搜索
大数据·人工智能·elasticsearch·搜索引擎·ai·全文检索
Dreams°1231 天前
【大数据测试 Elasticsearch 的标准--超详细篇】
大数据·elasticsearch·jenkins
鸠摩智首席音效师1 天前
如何在 Elasticsearch 中配置 SSL / TLS ?
elasticsearch·ssl
fishjam2 天前
[开源重构]Search(Elasticsearch/OpenSearch) Sync Tool
elasticsearch·重构·开源