当前位置:   article > 正文

es搜索,统计_es统计

es统计

记得好几年前用es做过标签画像统计,如今再看es时已是很生疏了,再用时已更新到了7.12版本了。以前用TransportClient客户端,现在出了而且是官方推荐用RestHighLevelClient客户端。

这几天用RestHighLevelClient时还是觉得比较方便的。现将一些基本常用功能记录一下。


1.初始化和关闭

public static RestHighLevelClient getClient(String host, int port) {
        LOGGER.info("Init ES!");
        client \= new RestHighLevelClient(
                RestClient.builder(new HttpHost(host, port, "http")));
        return client;
    }

    public static void closeES() {
        LOGGER.info("ES closed!");
        if(null != client) {
            try {
                client.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

2.创建index以及mapping

public boolean createIndexMapping(RestHighLevelClient client, String indexName){
        LOGGER.info("create index and mapping ...");
        CreateIndexRequest createIndexRequest \= new CreateIndexRequest(indexName);

        createIndexRequest.settings(Settings.builder()
                .put("index.number\_of\_shards",1)
                .put("index.number\_of\_replicas",0));

        try {
            XContentBuilder xContentBuilder \= XContentFactory.jsonBuilder().startObject().startObject("properties")
                    .startObject("content")
                    .field("type", "text") // 数据类型
                    .field("index", "true") //默认
                    .field("analyzer", "ik\_max\_word")
                    .field("search\_analyzer", "ik\_smart")
                    .endObject()  
  
                    .startObject("date")
                    .field("type", "date") // 数据类型
                    .field("index", "true") //默认
                    .endObject()
                    .startObject("title")
                    .field("type", "text") // 数据类型
                    .field("index", "true") //默认
                    .field("analyzer", "ik\_max\_word")
                    .field("search\_analyzer", "ik\_smart")
                    .endObject()

                    .endObject()
                    .endObject();

            createIndexRequest.mapping(xContentBuilder);
            CreateIndexResponse createIndexResponse \= client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
            return createIndexResponse.isAcknowledged();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return false;
    }

3.获取一篇doc

public Map<String, Object> getOneMap(RestHighLevelClient client, String index, String id) {
        GetRequest getRequest \= new GetRequest(index, id);
        GetResponse getResponse \= null;
        try {
            getResponse \= client.get(getRequest, RequestOptions.DEFAULT);
            if(getResponse.isExists()) {
                return getResponse.getSource();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

4.删除多篇

/\*\*
     \* 批量删除
     \* @param client
     \* @param index
     \* @param ids
     \* @return
     \*/
    public Object deleteList(RestHighLevelClient client, String index, List<String> ids) {
        //构建批量删除请求
        DeleteByQueryRequest request = new DeleteByQueryRequest(index);
        IdsQueryBuilder queryBuilder \= new IdsQueryBuilder();
        for(String id: ids) {
            queryBuilder.addIds(id);
        }
        // 匹配所有
        request.setQuery(queryBuilder);
        BulkByScrollResponse response \= null;
        try {
            response \= client.deleteByQuery(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return JSONObject.toJSON(response);
    }

5.批量导入

/\*\*
     \* 批量导入
     \* @param client
     \* @param index
     \* @param list
     \* @return
     \*/
    public boolean insertDocByBulk(RestHighLevelClient client, String index, List<Doc> list) {
        //批量插入请求
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("10s");
        for(int i = 0; i < list.size(); i++) {
            Doc doc \= list.get(i);
            //这里必须每次都使用new IndexRequest(index,type),不然只会插入最后一条记录(这样插入不会覆盖已经存在的Id,也就是不能更新)
            Map<String, Object> kv = new HashMap<>();
            kv.put("id", doc.getId());
            kv.put("title", doc.getTitle());
            kv.put("content", doc.getContent());

            bulkRequest.add(new IndexRequest(index).id(String.valueOf(doc.getId())).source(kv));
            //或者
            //bulkRequest.add(new IndexRequest(index).id(item.getID()).source(JSON.toJSONString(doc), XContentType.JSON));
        }

        try {
            // 客户端返回
            BulkResponse responses = client.bulk(bulkRequest, RequestOptions.DEFAULT);
            // responses.hasFailures(); // 是否失败,false表示成功!
            if(RestStatus.CREATED == responses.status()) {
                return true;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return false;
    }

6.批量更新

/\*\*
     \* 批量update
     \* @param client
     \* @param index
     \* @param list
     \* @return
     \*/
    public boolean updateDocByBulk(RestHighLevelClient client, String index, List<Doc> list) {
        //批量插入请求
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.timeout("10s");
        for(int i = 0; i < list.size(); i++) {
            Doc doc \= list.get(i);
            //这里必须每次都使用new IndexRequest(index,type),不然只会插入最后一条记录(这样插入不会覆盖已经存在的Id,也就是不能更新)
            Map<String, Object> kv = new HashMap<>();
            kv.put("id", doc.getId());
            kv.put("title", doc.getTitle());
            kv.put("content", doc.getContent());

            bulkRequest.add(new UpdateRequest().index(index).id(String.valueOf(doc.getId())).doc(kv));
        }

        try {
            // 客户端返回
            BulkResponse responses = client.bulk(bulkRequest, RequestOptions.DEFAULT);
            // responses.hasFailures(); // 是否失败,false表示成功!
            if(RestStatus.OK == responses.status()) {
                return true;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return false;
    }

7.全量搜索

7.1 利用scroll

/\*\*
     \* 全量搜索
     \* @param client
     \* @param index
     \* @param field1
     \* @param field2
     \* @param query
     \* @param size
     \* @param include
     \* @param exclude
     \* @return
     \*/
    public List<org.elasticsearch.search.SearchHit> searchByQueryScrollAll(RestHighLevelClient client, String index, String field1, String field2, String query, int size, String\[\] include, String\[\] exclude) {
        List<org.elasticsearch.search.SearchHit> result = CollectionUtil.newArrayList();
        final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1L));
        SearchRequest searchRequest \= new SearchRequest(index);
        searchRequest.scroll(scroll);
        SearchSourceBuilder searchSourceBuilder \= new SearchSourceBuilder();
        // 高亮显示
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        // 高亮标签
        highlightBuilder.preTags("<a style='color: #e4393c'>");
        highlightBuilder.postTags("</a>");
        // 高亮字段
        highlightBuilder.field(field2);
        //设置最多一次能够取出(size)笔数据,从第(size + 1)笔数据开始,将开启滚动查询。  (滚动查询也属于这一次查询,只不过因为一次查不完,分多次查)
        searchSourceBuilder.size(size);
        //searchSourceBuilder.sort("\_score", SortOrder.DESC);
        //socre相同,则按时间降序排序
        //searchSourceBuilder.sort("publish\_date", SortOrder.DESC);
        //高亮显示添加到构造器(不需要高亮显示则不添加)
        searchSourceBuilder.highlighter(highlightBuilder);
        // 多字段联合查询
        //searchSourceBuilder.query(QueryBuilders.multiMatchQuery(query, field1, field2));
        searchSourceBuilder.query(QueryBuilders.boolQuery()
                .should(QueryBuilders.matchQuery(field1, query))
                .must(QueryBuilders.matchQuery(field2, query)));
        searchSourceBuilder.fetchSource(include, exclude);
        searchRequest.source(searchSourceBuilder);
        SearchResponse searchResponse \= null;
        try {
            searchResponse \= client.search(searchRequest, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
        String scrollId \= searchResponse.getScrollId();
        org.elasticsearch.search.SearchHit\[\] searchHits \= searchResponse.getHits().getHits();
        while (searchHits != null && searchHits.length > 0) {
            for(org.elasticsearch.search.SearchHit hit: searchHits) {
                //String highlightText = hit.getHighlightFields().get(field2).getFragments()\[0\].toString();
                result.add(hit);
            }
            SearchScrollRequest searchScrollRequest \= new SearchScrollRequest(scrollId);
            searchScrollRequest.scroll(scroll);
            try {
                searchResponse \= client.scroll(searchScrollRequest, RequestOptions.DEFAULT);
            } catch (IOException e) {
                e.printStackTrace();
            }
            scrollId \= searchResponse.getScrollId();
            searchHits \= searchResponse.getHits().getHits();
        }

        if(null != scrollId) {
            ClearScrollRequest clearScrollRequest \= new ClearScrollRequest();
            clearScrollRequest.addScrollId(scrollId);
            // 滚动完成后清除滚动上下文
            ClearScrollResponse clearScrollResponse = null;
            try {
                clearScrollResponse \= client.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
            } catch (IOException e) {
                e.printStackTrace();
            }
            //清除滚动是否成功
            boolean succeeded = clearScrollResponse.isSucceeded();
        }
        return result;
    }

7.2利用after

/\*\*
     \* 全量搜索
     \* @param client
     \* @param index
     \* @param field1
     \* @param field2
     \* @param query
     \* @param size
     \* @param include
     \* @param exclude
     \* @return
     \*/
    public List<org.elasticsearch.search.SearchHit> searchByQuerySearchAfter(RestHighLevelClient client, String index, String field1, String field2, String query, int size, String\[\] include, String\[\] exclude) {
        List<org.elasticsearch.search.SearchHit> result = CollectionUtil.newArrayList();
        SearchRequest request \= new SearchRequest(index);
        SearchSourceBuilder searchSourceBuilder \= new SearchSourceBuilder();
        // 高亮显示
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        // 高亮标签
        highlightBuilder.preTags("<a style='color: #e4393c'>");
        highlightBuilder.postTags("</a>");
        // 高亮字段
        highlightBuilder.field(field2);
        QueryBuilder queryBuilder \= QueryBuilders.boolQuery()
                .should(QueryBuilders.matchQuery(field1, query))
                .must(QueryBuilders.matchQuery(field2, query));

        // searchSourceBuilder.query(QueryBuilders.matchQuery(field, query));
        searchSourceBuilder.query(queryBuilder);
        searchSourceBuilder.fetchSource(include, exclude);
        //每页显示条数
        searchSourceBuilder.size(size);
        // 需要唯一不重复的字段作为排序
        searchSourceBuilder.sort("\_id", SortOrder.DESC);
        //searchSourceBuilder.sort("\_score", SortOrder.DESC);
        //score相同,则按时间降序排序
        //searchSourceBuilder.sort("publish\_date", SortOrder.DESC);
        //高亮显示添加到构造器(不需要高亮显示则不添加)
        searchSourceBuilder.highlighter(highlightBuilder);
        //构造器添加到搜索请求
        request.source(searchSourceBuilder);
        //客户端返回
        SearchResponse response = null;
        try {
            response \= client.search(request, RequestOptions.DEFAULT);
            //搜索结果
            org.elasticsearch.search.SearchHit\[\] hits = response.getHits().getHits();
            while(hits.length > 0) {
                for(org.elasticsearch.search.SearchHit hit : hits) {
                    result.add(hit);
                }
                org.elasticsearch.search.SearchHit last \= hits\[hits.length - 1\];
                searchSourceBuilder.searchAfter(last.getSortValues());
                response \= client.search(request, RequestOptions.DEFAULT);
                hits \= response.getHits().getHits();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return result;
    }

8.统计分析

/\*\*
     \* 多field统计
     \* @param client
     \* @param index
     \* @param query
     \* @param field1
     \* @param field2
     \* @param aggFields
     \*/
    public static Map<String, Map<String, Long>> countDocByTermsAgg(RestHighLevelClient client, String index, String query, String field1, String field2, String ... aggFields) {
        SearchSourceBuilder searchSourceBuilder \= new SearchSourceBuilder();
        searchSourceBuilder.fetchSource(false);
        SearchRequest request \= new SearchRequest(index);
        QueryBuilder queryBuilder \= QueryBuilders.boolQuery()
                .should(QueryBuilders.matchQuery(field1, query))
                .must(QueryBuilders.matchQuery(field2, query));
        searchSourceBuilder.query(queryBuilder);
        Map<String, Map<String, Long>>  fieldAggMap = CollectionUtil.newLinkedHashMap();
        TermsAggregationBuilder aggregationBuilder;
        SearchResponse response \= null;
        for(String fieldName : aggFields) {
            aggregationBuilder \= AggregationBuilders.terms("agg\_name").field(fieldName);
            searchSourceBuilder.aggregation(aggregationBuilder);
            request.source(searchSourceBuilder);
            searchSourceBuilder.size(0);
            try {
                response \= client.search(request, RequestOptions.DEFAULT);
            } catch (IOException e) {
                e.printStackTrace();
            }
            Aggregations aggregations \= response.getAggregations();
            Terms byTopicAggregation \= aggregations.get("agg\_name");
            List<? extends Terms.Bucket> buckets = byTopicAggregation.getBuckets();
            Map<String, Long> bucketsFieldsAgg = CollectionUtil.newLinkedHashMap();
            buckets.forEach(b \->
                    bucketsFieldsAgg.put(b.getKeyAsString(), b.getDocCount())
            );
            fieldAggMap.put(fieldName, bucketsFieldsAgg);
        }
        return fieldAggMap;
    }
    
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • 238
  • 239
  • 240
  • 241
  • 242
  • 243
  • 244
  • 245
  • 246
  • 247
  • 248
  • 249
  • 250
  • 251
  • 252
  • 253
  • 254
  • 255
  • 256
  • 257
  • 258
  • 259
  • 260
  • 261
  • 262
  • 263
  • 264
  • 265
  • 266
  • 267
  • 268
  • 269
  • 270
  • 271
  • 272
  • 273
  • 274
  • 275
  • 276
  • 277
  • 278
  • 279
  • 280
  • 281
  • 282
  • 283
  • 284
  • 285
  • 286
  • 287
  • 288
  • 289
  • 290
  • 291
  • 292
  • 293
  • 294
  • 295
  • 296
  • 297
  • 298
  • 299
  • 300
  • 301
  • 302
  • 303
  • 304
  • 305
  • 306
  • 307
  • 308
  • 309
  • 310
  • 311
  • 312
  • 313
  • 314
  • 315
  • 316
  • 317
  • 318
  • 319
  • 320
  • 321
  • 322
  • 323
  • 324
  • 325
  • 326
  • 327
  • 328
  • 329
  • 330
  • 331
  • 332
  • 333
  • 334
  • 335
  • 336
  • 337
  • 338
  • 339
  • 340
  • 341
  • 342
  • 343
  • 344
  • 345
  • 346
  • 347
  • 348
  • 349
  • 350
  • 351
  • 352
  • 353
  • 354
  • 355
  • 356
  • 357
  • 358
  • 359
  • 360
  • 361
  • 362
  • 363
  • 364
  • 365
  • 366
  • 367
  • 368
  • 369
  • 370
  • 371
  • 372
  • 373
  • 374
本文内容由网友自发贡献,转载请注明出处:https://www.wpsshop.cn/w/正经夜光杯/article/detail/994887
推荐阅读
相关标签
  

闽ICP备14008679号