From 4fdca4bf68badecb4f9e2c46c775b2ad175d7ab2 Mon Sep 17 00:00:00 2001 From: Anatolii Karlov Date: Mon, 12 Feb 2024 12:41:33 +0200 Subject: [PATCH] OPS-407: refactor search query, use keywords (#49) * OPS-407: refactor search query, use keywords * OPS-407: refactor search query, use keywords * OPS-407: refactor search query, use keywords --- README.md | 32 ++ .../constant/OpenSearchConstants.java | 6 +- .../vality/deanonimus/db/SearchDaoImpl.java | 109 ++--- .../java/dev/vality/deanonimus/ReadTest.java | 66 ++++ src/test/resources/index_model.json | 371 ++++++++++++++++++ 5 files changed, 510 insertions(+), 74 deletions(-) create mode 100644 src/test/resources/index_model.json diff --git a/README.md b/README.md index d162e0e..089a684 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,34 @@ # deanonimus Сервис для полнотекстового поиска party + +актуальные примеры с spring data elastic + opensearch + +https://github.com/opensearch-project/spring-data-opensearch/blob/main/spring-data-opensearch-examples/spring-boot-gradle/src/main/java/org/opensearch/data/example/service/MarketplaceInitializer.java +https://github.com/M-Razavi/Spring-Data-OpenSearch-Example + +полезные ссылки +https://opensearch.org/docs/latest/query-dsl/full-text/index/ +https://opensearch.org/docs/latest/query-dsl/full-text/multi-match/ + +сниппеты +```java +getMappingResponse.get("party").mappings().properties().get("id").text().fields().get("keyword").keyword() + +search.hits().hits().get(0).source() + +var criteria = QueryBuilders.boolQuery() + .must(QueryBuilders.matchQuery("authorName", author)) + .must(QueryBuilders.matchQuery("title", title)); + +SearchRequest searchRequest = new SearchRequest("books"); + searchRequest.source().query(criteria); + + try { +SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); + return Arrays.stream(response.getHits().getHits()) + .map(hit -> new ObjectMapper().convertValue(hit.getSourceAsMap(), Book.class)) + .collect(Collectors.toList()); + } catch (IOException e) { + throw new RuntimeException("Error executing search", e); + } +``` \ No newline at end of file diff --git a/src/main/java/dev/vality/deanonimus/constant/OpenSearchConstants.java b/src/main/java/dev/vality/deanonimus/constant/OpenSearchConstants.java index d7ef759..52f4783 100644 --- a/src/main/java/dev/vality/deanonimus/constant/OpenSearchConstants.java +++ b/src/main/java/dev/vality/deanonimus/constant/OpenSearchConstants.java @@ -1,9 +1,7 @@ package dev.vality.deanonimus.constant; public class OpenSearchConstants { + public static final String PARTY_INDEX = "party"; - public static final String SHOP_INDEX = "shops"; - public static final String WALLET_INDEX = "wallets"; - public static final String CONTRACT_INDEX = "contracts"; - public static final String CONTRACTOR_INDEX = "contractors"; + } diff --git a/src/main/java/dev/vality/deanonimus/db/SearchDaoImpl.java b/src/main/java/dev/vality/deanonimus/db/SearchDaoImpl.java index 3d47843..72303bf 100644 --- a/src/main/java/dev/vality/deanonimus/db/SearchDaoImpl.java +++ b/src/main/java/dev/vality/deanonimus/db/SearchDaoImpl.java @@ -10,7 +10,7 @@ import org.opensearch.client.opensearch.core.SearchResponse; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -import static dev.vality.deanonimus.constant.OpenSearchConstants.*; +import java.util.List; @Slf4j @Component @@ -18,24 +18,19 @@ import static dev.vality.deanonimus.constant.OpenSearchConstants.*; public class SearchDaoImpl implements SearchDao { @Value("${data.response.limit}") - Integer responseLimit; + private Integer responseLimit; private final OpenSearchClient openSearchClient; - @SneakyThrows @Override public SearchResponse searchParty(String text) { - - BoolQuery queryBuilder = new BoolQuery.Builder() - .should(searchPartyFields(text), - searchShopFields(text), - searchContractFields(text), - searchContractorFields(text), - searchWalletFields(text)) + var queryBuilder = new BoolQuery.Builder() + .should(searchBestFields(text, keywords()), + searchPhrasePrefix(text, fields())) .build(); - - return openSearchClient.search(s -> s + return openSearchClient.search( + s -> s .size(responseLimit) .query(new Query.Builder() .bool(queryBuilder) @@ -43,72 +38,46 @@ public class SearchDaoImpl implements SearchDao { Party.class); } - private Query searchContractorFields(String text) { - return new NestedQuery.Builder() - .path(CONTRACTOR_INDEX) - .query(new Query(new MultiMatchQuery.Builder() - .fields("contractors.id", - "contractors.registeredUserEmail", - "contractors.russianLegalEntityRegisteredName", - "contractors.russianLegalEntityInn", - "contractors.russianLegalEntityRussianBankAccount", - "contractors.internationalLegalEntityLegalName", - "contractors.internationalLegalEntityTradingName") - .query(text) - .type(TextQueryType.Phrase) - .build())) - .scoreMode(ChildScoreMode.Sum) - .build().query(); + private List keywords() { + return List.of( + "id.keyword", + "contractors.id.keyword", + "contractors.russianLegalEntityInn.keyword", + "contractors.russianLegalEntityRussianBankAccount.keyword", + "contracts.id.keyword", + "shops.id.keyword", + "wallets.id.keyword"); } - private Query searchContractFields(String text) { - return new NestedQuery.Builder() - .path(CONTRACT_INDEX) - .query(new Query(new MultiMatchQuery.Builder() - .fields("contracts.id", - "contracts.legalAgreementId", - "contracts.reportActSignerFullName") - .query(text) - .type(TextQueryType.Phrase) - .build())) - .scoreMode(ChildScoreMode.Sum) - .build().query(); + private List fields() { + return List.of( + "email", + "contractors.registeredUserEmail", + "contractors.russianLegalEntityRegisteredName", + "contractors.internationalLegalEntityLegalName", + "contractors.internationalLegalEntityTradingName", + "contracts.legalAgreementId", + "contracts.reportActSignerFullName", + "shops.locationUrl", + "shops.detailsName", + "wallets.name"); } - private Query searchPartyFields(String text) { + private Query searchBestFields(String text, List fields) { return new Query(new MultiMatchQuery.Builder() - .fields("id", - "email") + .fields(fields) .query(text) - .type(TextQueryType.Phrase) + .type(TextQueryType.BestFields) + .operator(Operator.Or) .build()); } - private Query searchShopFields(String text) { - return new NestedQuery.Builder() - .path(SHOP_INDEX) - .query(new Query(new MultiMatchQuery.Builder() - .fields("shops.id", - "shops.locationUrl", - "shops.detailsName") - .query(text) - .type(TextQueryType.Phrase) - .build())) - .scoreMode(ChildScoreMode.Sum) - .build().query(); + private Query searchPhrasePrefix(String text, List fields) { + return new Query(new MultiMatchQuery.Builder() + .fields(fields) + .query(text) + .type(TextQueryType.PhrasePrefix) + .operator(Operator.Or) + .build()); } - - private Query searchWalletFields(String text) { - return new NestedQuery.Builder() - .path(WALLET_INDEX) - .query(new Query(new MultiMatchQuery.Builder() - .fields("wallets.id", - "wallets.name") - .query(text) - .type(TextQueryType.Phrase) - .build())) - .scoreMode(ChildScoreMode.Sum) - .build().query(); - } - } diff --git a/src/test/java/dev/vality/deanonimus/ReadTest.java b/src/test/java/dev/vality/deanonimus/ReadTest.java index 5f336a7..e0c1a26 100644 --- a/src/test/java/dev/vality/deanonimus/ReadTest.java +++ b/src/test/java/dev/vality/deanonimus/ReadTest.java @@ -8,13 +8,19 @@ import dev.vality.deanonimus.handler.DeanonimusServiceHandler; import dev.vality.deanonimus.service.OpenSearchService; import lombok.SneakyThrows; import org.apache.thrift.TException; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.indices.DeleteIndexRequest; +import org.opensearch.client.opensearch.indices.ExistsRequest; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; +import java.io.IOException; import java.util.List; +import java.util.UUID; +import static dev.vality.deanonimus.constant.OpenSearchConstants.PARTY_INDEX; import static org.junit.jupiter.api.Assertions.*; public class ReadTest extends AbstractIntegrationTest { @@ -42,6 +48,13 @@ public class ReadTest extends AbstractIntegrationTest { private static final String INN = "1234234123"; private static final String ACCOUNT = "9999999999"; + @BeforeEach + void setUp() throws IOException { + var indices = client.indices(); + if (indices.exists(new ExistsRequest.Builder().index(PARTY_INDEX).build()).value()) { + indices.delete(new DeleteIndexRequest.Builder().index(PARTY_INDEX).build()); + } + } @Test void searchByPartyId() throws TException { @@ -54,6 +67,30 @@ public class ReadTest extends AbstractIntegrationTest { .anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(EMAIL))); } + @Test + void searchByPartyIdAdds() throws TException { + var id = UUID.randomUUID().toString(); + var mail = "asd zxc fgh"; + givenParty(id, mail); + refreshIndices(); + var searchHits = deanonimusServiceHandler.searchParty(mail); + assertFalse(searchHits.isEmpty()); + assertTrue(searchHits.stream() + .anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(mail))); + searchHits = deanonimusServiceHandler.searchParty(id); + assertFalse(searchHits.isEmpty()); + assertTrue(searchHits.stream() + .anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(mail))); + // match partial field + searchHits = deanonimusServiceHandler.searchParty("asd z"); + assertFalse(searchHits.isEmpty()); + assertTrue(searchHits.stream() + .anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(mail))); + // for keyword match only full field + searchHits = deanonimusServiceHandler.searchParty(id.substring(0, 8)); + assertTrue(searchHits.isEmpty()); + } + @Test void searchByPartyIdWithoutTokens() throws TException { givenParty(PARTY + "-test-kek", EMAIL + "1"); @@ -169,6 +206,34 @@ public class ReadTest extends AbstractIntegrationTest { .getRegisteredUser().getEmail().contains(EMAIL))); } + @Test + void searchByContractorEmailAdds() throws TException { + var id = UUID.randomUUID().toString(); + var mail = "asd zxc fgh"; + var party = givenParty(PARTY, null); + givenRegisteredUserContractor(party, id, mail); + refreshIndices(); + var searchHits = deanonimusServiceHandler.searchParty(mail); + assertFalse(searchHits.isEmpty()); + assertTrue(searchHits.stream() + .anyMatch(partySearchHit -> partySearchHit.getParty().getContractors().get(id).getContractor() + .getRegisteredUser().getEmail().contains(mail))); + searchHits = deanonimusServiceHandler.searchParty(id); + assertFalse(searchHits.isEmpty()); + assertTrue(searchHits.stream() + .anyMatch(partySearchHit -> partySearchHit.getParty().getContractors().get(id).getContractor() + .getRegisteredUser().getEmail().contains(mail))); + // match partial field + searchHits = deanonimusServiceHandler.searchParty("asd z"); + assertFalse(searchHits.isEmpty()); + assertTrue(searchHits.stream() + .anyMatch(partySearchHit -> partySearchHit.getParty().getContractors().get(id).getContractor() + .getRegisteredUser().getEmail().contains(mail))); + // for keyword match only full field + searchHits = deanonimusServiceHandler.searchParty(id.substring(0, 8)); + assertTrue(searchHits.isEmpty()); + } + @Test void searchByContractorRussianLegalEntityRegisteredNameWithOneWord() throws TException { Party party = givenParty(PARTY, null); @@ -357,6 +422,7 @@ public class ReadTest extends AbstractIntegrationTest { openSearchService.updateParty(party); } + @SneakyThrows private void refreshIndices() { client.indices().refresh(); diff --git a/src/test/resources/index_model.json b/src/test/resources/index_model.json new file mode 100644 index 0000000..c5dcdb0 --- /dev/null +++ b/src/test/resources/index_model.json @@ -0,0 +1,371 @@ +{ + "party": { + "mappings": { + "properties": { + "blocking": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "contractors": { + "properties": { + "id": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "internationalLegalEntityActualAddress": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "internationalLegalEntityLegalName": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "internationalLegalEntityRegisteredAddress": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "internationalLegalEntityRegisteredNumber": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "internationalLegalEntityTradingName": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "legalEntity": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "partyId": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "registeredUserEmail": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityActualAddress": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityInn": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityPostAddress": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityRegisteredName": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityRegisteredNumber": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityRussianBankAccount": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityRussianBankBik": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityRussianBankName": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "russianLegalEntityRussianBankPostAccount": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "type": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "contracts": { + "properties": { + "contractorId": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "id": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "legalAgreementId": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "partyId": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "paymentInstitutionId": { + "type": "long" + }, + "status": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "termsId": { + "type": "long" + } + } + }, + "email": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "id": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "shops": { + "properties": { + "accountCurrencyCode": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "accountGuarantee": { + "type": "long" + }, + "accountPayout": { + "type": "long" + }, + "accountSettlement": { + "type": "long" + }, + "blocking": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "categoryId": { + "type": "long" + }, + "contractId": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "detailsDescription": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "detailsName": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "id": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "locationUrl": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "payoutToolId": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "suspension": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }, + "suspension": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + } + } +}