当前位置:   article > 正文

ElasticSearch 拼音插件elasticsearch-analysis-pinyin + IK 分词器

elasticsearch 拼音插件

ElasticSearch + kibana 部署略

创建索引

PUT /nba_20220101
{
	"mappings": {
		"properties": {
			"age": {
				"type": "integer"
			},
			"birthDay": {
				"type": "date"
			},
			"birthDayStr": {
				"type": "keyword"
			},
			"code": {
				"type": "text"
			},
			"country": {
				"type": "keyword"
			},
			"countryEn": {
				"type": "keyword"
			},
			"displayAffiliation": {
				"type": "text"
			},
			"displayName": {
				"type": "text",
				"analyzer": "ik_max_word_pinyin", 
				  "fields" : {
					"suggest" : {
					  "type" : "completion",
					  "analyzer" : "ik_smart_pinyin",
					  "preserve_separators" : true,
					  "preserve_position_increments" : true,
					  "max_input_length" : 50
					}
				  }
			},
			"displayNameEn": {
				"type": "text"
			},
			"draft": {
				"type": "long"
			},
			"heightValue": {
				"type": "float"
			},
			"jerseyNo": {
				"type": "keyword"
			},
			"playYear": {
				"type": "long"
			},
			"playerId": {
				"type": "keyword"
			},
			"position": {
				"type": "text"
			},
			"schoolType": {
				"type": "text"
			},
			"teamCity": {
				"type": "text"
			},
			"teamCityEn": {
				"type": "text"
			},
			"teamConference": {
				"type": "keyword"
			},
			"teamConferenceEn": {
				"type": "keyword"
			},
			"teamName": {
				"type": "keyword"
			},
			"teamNameEn": {
				"type": "keyword"
			},
			"weight": {
				"type": "text"
			}
		}
	},
    "settings" : {
      "index" : {
        "analysis" : {
          "analyzer" : {
            "default" : {
              "type" : "ik_max_word"
            }
          }
        }
      },
	  "analysis": {
            "analyzer": {
                "ik_smart_pinyin": {
                    "type": "custom",
                    "tokenizer": "ik_smart",
                    "filter": ["my_pinyin"]
                },
                "ik_max_word_pinyin": {
                    "type": "custom",
                    "tokenizer": "ik_max_word",
                    "filter": ["my_pinyin"]
                }
            },
            "filter": {
                "my_pinyin": {
                    "type" : "pinyin",
                    "keep_separate_first_letter" : false,
                    "keep_full_pinyin" : true,
                    "keep_original" : true,
                    "limit_first_letter_length" : 16,
                    "lowercase" : true,
                    "remove_duplicated_term" : true
                }
            }
        }
    }
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123

自定义 ik_smart_pinyin ik_max_word_pinyin 分词

  • 分别基于 my_pinyin + ik_max_wordmy_pinyin + ik_smart 创建自定义分词器
  • my_pinyin 定义拼音分词过滤
  • displayName 字段使用 ik_max_word_pinyin 使用 拼音 汉字 双检索, 并 添加 suggest 检索补全

pinyin 插件可选参数说明

配置参数说明示例
keep_first_letter保存第一个字母,默认值:true例如: 刘德华 > ldh
keep_separate_first_letter启用此选项后,将单独保留第一个字母, 默认值: false例如:刘德华 > l, d, h,(注意:由于词频太高,​​查询结果可能太模糊)
limit_first_letter_length设置 first_letter 结果的最大长度,默认值:16
keep_full_pinyin当启用此选项时,默认值:true例如:刘德华> [ liu, de, hua]
keep_joined_full_pinyin当启用此选项时,默认值:false例如:刘德华> [ liudehua]
keep_none_chinese结果保留非中文字母或数字,默认值:true例如:刘德华AT2016-> ldhat2016, 注意:keep_none_chinese应先启用keep_none_chinese_in_first_letter
keep_none_chinese_in_first_letter首字母保留非中文字母,默认值:true例如:刘德华AT2016-> ldhat2016
keep_none_chinese_together保持非中文字母在一起,默认:true例如:DJ音乐家-> DJ, yin, yue, jia , 当设置为 时false,例如:DJ音乐家-> D, J, yin, yue, jia,
keep_none_chinese_in_joined_full_pinyin保留非中文字母加入全拼音,默认:false例如:刘德华2016-> liudehua2016
none_chinese_pinyin_tokenize如果非中文字母是拼音,则将其拆分为单独的拼音词 ,默认:true例如:liudehuaalibaba13zhuanghan-> liu, de, hua, a, li, ba, , ba, 13, zhuang, han ( 注意: keep_none_chinese并且keep_none_chinese_together应首先启用 )
keep_original启用此选项时,也将保留原始输入 ,默认值:false
lowercase小写非汉字,默认:true
trim_whitespace默认值:true
remove_duplicated_term启用此选项时,将删除重复的术语以保存索引,默认值:false例如:de的> de (注意:位置相关查询可能会受到影响 )
ignore_pinyin_offset6.0以后严格限制offset,不允许重叠token,有了这个参数,overlapping token会忽略offset ,默认值:true注意,所有position相关的query或者highlight都会出错,应该使用multi fields,不同的设置不同查询目的。如果需要偏移量,请将其设置为 false。

导入数据

POST /nba_20220101/_doc/566
{
	"countryEn": "Croatia",
	"teamName": "快船",
	"birthDay": 858661200000,
	"country": "克罗地亚",
	"teamCityEn": "LA",
	"code": "ivica_zubac",
	"displayAffiliation": "Croatia",
	"displayName": "伊维察 祖巴茨哥哥",
	"schoolType": "",
	"teamConference": "西部",
	"teamConferenceEn": "Western",
	"weight": "108.9 公斤",
	"teamCity": "洛杉矶",
	"playYear": 3,
	"jerseyNo": "40",
	"teamNameEn": "Clippers",
	"draft": 2016,
	"displayNameEn": "Ivica Zubac",
	"heightValue": 2.16,
	"birthDayStr": "1997-03-18",
	"position": "中锋",
	"age": 22,
	"playerId": "1627826"
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26

校验汉字分词是否有效

GET /nba_20220101/_search
{
  "query": {
    "match": {
      "displayName": "伊维察"
    }
  }
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8

在这里插入图片描述

校验拼音分词是否有效

GET /nba_20220101/_search
{
  "query": {
    "match": {
      "displayName": "yi wei"
    }
  }
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8

在这里插入图片描述

校验汉字补全是否生效

POST /nba_20220101/_search
{
	"suggest": {
		"my-suggestion": {
			"text": "伊维",
			"completion": {
				"field": "displayName.suggest"
			}
		}
	}
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11

在这里插入图片描述

校验拼音补全是否生效

POST /nba_20220101/_search
{
	"suggest": {
		"my-suggestion": {
			"text": "yi wei",
			"completion": {
				"field": "displayName.suggest"
			}
		}
	}
}
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11

在这里插入图片描述

注意拼音分词与中文分词不一致时(可以参考下方配置)

在这里插入图片描述

 "analysis": {
            "analyzer": {
                "ik_smart_pinyin": {
                    "type": "custom",
                    "tokenizer": "ik_smart",
                    "filter": ["my_pinyin"]
                },
                "ik_max_word_pinyin": {
                    "type": "custom",
                    "tokenizer": "ik_max_word",
                    "filter": ["my_pinyin"]
                }
            },
            "filter": {
                "my_pinyin": {
                   "type": "pinyin",
					 "keep_first_letter": false,
					  "keep_separate_first_letter": false,
					  "keep_full_pinyin": false,
					  "keep_original": true,
					  "limit_first_letter_length": 16,
					  "lowercase": true,
					  "remove_duplicated_term": true,
					  "keep_joined_full_pinyin": true,
					  "keep_none_chinese_together": true,
					  "none_chinese_pinyin_tokenize": false,
					  "keep_none_chinese":true,
					  "keep_none_chinese_in_joined_full_pinyin":true
                }
            }
        }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/木道寻08/article/detail/966779
推荐阅读
相关标签
  

闽ICP备14008679号