ElasticSearch（3）

最新推荐文章于 2023-06-05 01:53:47 发布

耗子肉

最新推荐文章于 2023-06-05 01:53:47 发布

阅读量314

点赞数

分类专栏： Elasticsearch bug处理

本文链接：https://blog.csdn.net/haozi_rou/article/details/104771151

版权

本文介绍了如何使用ElasticSearch导入tmdb电影数据，并探讨了各种查询技术，包括match、and查询、短语查询、多字段查询及布尔查询。讨论了评分机制，如TF/IDF和TFNORM，以及如何通过调整字段权重和tie_breaker优化查询结果。

摘要由CSDN通过智能技术生成

导入tmdb

tmdb是电影数据，他的数据量很大，非常适合用作es实践。直接谷歌kaggle tmdb下载。

首先还是要在es上建立mapping：

PUT /movie
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "title":{"type": "text","analyzer": "english"},
      "tagline":{"type": "text","analyzer": "english"},
      "release_date":{"type": "date","format": "8yyyy/MM/dd||yyyy/M/dd||yyyy/MM/d||yyyy/M/d"},
      "popularity":{"type": "double"},
      "overview":{"type": "text","analyzer": "english"},
      "cast":{
        "type": "object",
        "properties": {
          "character":{"type":"text","analyzer":"standard"},
          "name":{"type":"text","analyzer":"standard"}
        }
        
      }
    }
  }
}

接下来创建一个程序

        <dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter</artifactId>
		</dependency>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-web</artifactId>
		</dependency>
		<dependency>
			<groupId>org.elasticsearch</groupId>
			<artifactId>elasticsearch</artifactId>
			<version>7.6.1</version>
		</dependency>
		<dependency>
			<groupId>org.elasticsearch.client</groupId>
			<artifactId>transport</artifactId>
			<version>7.6.1</version>
		</dependency>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-test</artifactId>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.elasticsearch.plugin</groupId>
			<artifactId>transport-netty4-client</artifactId>
			<version>7.6.1</version>
		</dependency>
		<dependency>
			<groupId>com.opencsv</groupId>
			<artifactId>opencsv</artifactId>
			<version>4.2</version>
		</dependency>
		<dependency>
			<groupId>com.alibaba</groupId>
			<artifactId>fastjson</artifactId>
			<version>1.2.58</version>
		</dependency>

@Configuration
public class ESConfig {
    @Bean
    public TransportClient getClient(){
        TransportClient transportClient = null;
        try {
            Settings settings = Settings.builder()
                    .put("cluster.name","dianping-app").build();
            transportClient = new PreBuiltTransportClient(settings);
            TransportAddress firstAddress = new TransportAddress(InetAddress.getByName("127.0.0.1"),Integer.parseInt("9300"));
            TransportAddress secondAddress = new TransportAddress(InetAddress.getByName("127.0.0.1"),Integer.parseInt("9301"));
            TransportAddress thirdAddress = new TransportAddress(InetAddress.getByName("127.0.0.1"),Integer.parseInt("9302"));
            transportClient.addTransportAddress(firstAddress);
            transportClient.addTransportAddress(secondAddress);
            transportClient.addTransportAddress(thirdAddress);

        }catch (Exception e){
            e.printStackTrace();

        }
        return transportClient;
    }
}

@Controller("/es")
@RequestMapping("/es")
public class ESController {

    @Autowired
    private TransportClient transportClient;

    @RequestMapping("/get")
    @ResponseBody
    public ResponseEntity get(@RequestParam(name="id")Integer id){
        GetResponse getResponse = transportClient.prepareGet("movie",null,id.toString()).get();
        return new ResponseEntity(getResponse.getSource(), HttpStatus.OK);
    }


    @RequestMapping("/importdata")
    @ResponseBody
    public ResponseEntity importdata() throws IOException {
        //批量插入
        BulkRequest bulkRequest = new BulkRequest();
        int lineId = 0;
        InputStreamReader in = new InputStreamReader(new FileInputStream("./tmdb_5000_movies.csv"), Charset.forName("UTF-8"));
        CSVReader reader = new CSVReader(in, ',');
        List<String[]> allRecords = reader.readAll();
        for (String[] records : allRecords) {
            lineId++;
            if(lineId == 1){
                continue;
            }
            try{
                JSONArray castJsonArray = JSONArray.parseArray(records[20]);
                String character = (String) castJsonArray.getJSONObject(0).get("character");
                String name = (String) castJsonArray.getJSONObject(0).get("name");
                JSONObject cast = new JSONObject();
                cast.put("character",character);
                cast.put("name",name);
                String date = records[11];
                if(date == null || date.equals("")){
                    date = "1970/01/01";
                }
                //IndexRequest一条索引记录
                bulkRequest.add(new IndexRequest("movie", "_doc", String.valueOf(lineId-1)).source(XContentType.JSON,
                        "title", records[17],
                        "tagline",records[16],
                        "release_date",date,
                        "popularity",records[8],
                        "cast",cast,