OPS-407: refactor search query, use keywords (#49)

* OPS-407: refactor search query, use keywords

* OPS-407: refactor search query, use keywords

* OPS-407: refactor search query, use keywords
This commit is contained in:
Anatolii Karlov 2024-02-12 12:41:33 +02:00 committed by GitHub
parent 14fd1ef327
commit 4fdca4bf68
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 510 additions and 74 deletions

View File

@ -1,2 +1,34 @@
# deanonimus # deanonimus
Сервис для полнотекстового поиска party Сервис для полнотекстового поиска party
актуальные примеры с spring data elastic + opensearch
https://github.com/opensearch-project/spring-data-opensearch/blob/main/spring-data-opensearch-examples/spring-boot-gradle/src/main/java/org/opensearch/data/example/service/MarketplaceInitializer.java
https://github.com/M-Razavi/Spring-Data-OpenSearch-Example
полезные ссылки
https://opensearch.org/docs/latest/query-dsl/full-text/index/
https://opensearch.org/docs/latest/query-dsl/full-text/multi-match/
сниппеты
```java
getMappingResponse.get("party").mappings().properties().get("id").text().fields().get("keyword").keyword()
search.hits().hits().get(0).source()
var criteria = QueryBuilders.boolQuery()
.must(QueryBuilders.matchQuery("authorName", author))
.must(QueryBuilders.matchQuery("title", title));
SearchRequest searchRequest = new SearchRequest("books");
searchRequest.source().query(criteria);
try {
SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
return Arrays.stream(response.getHits().getHits())
.map(hit -> new ObjectMapper().convertValue(hit.getSourceAsMap(), Book.class))
.collect(Collectors.toList());
} catch (IOException e) {
throw new RuntimeException("Error executing search", e);
}
```

View File

@ -1,9 +1,7 @@
package dev.vality.deanonimus.constant; package dev.vality.deanonimus.constant;
public class OpenSearchConstants { public class OpenSearchConstants {
public static final String PARTY_INDEX = "party"; public static final String PARTY_INDEX = "party";
public static final String SHOP_INDEX = "shops";
public static final String WALLET_INDEX = "wallets";
public static final String CONTRACT_INDEX = "contracts";
public static final String CONTRACTOR_INDEX = "contractors";
} }

View File

@ -10,7 +10,7 @@ import org.opensearch.client.opensearch.core.SearchResponse;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import static dev.vality.deanonimus.constant.OpenSearchConstants.*; import java.util.List;
@Slf4j @Slf4j
@Component @Component
@ -18,24 +18,19 @@ import static dev.vality.deanonimus.constant.OpenSearchConstants.*;
public class SearchDaoImpl implements SearchDao { public class SearchDaoImpl implements SearchDao {
@Value("${data.response.limit}") @Value("${data.response.limit}")
Integer responseLimit; private Integer responseLimit;
private final OpenSearchClient openSearchClient; private final OpenSearchClient openSearchClient;
@SneakyThrows @SneakyThrows
@Override @Override
public SearchResponse<Party> searchParty(String text) { public SearchResponse<Party> searchParty(String text) {
var queryBuilder = new BoolQuery.Builder()
BoolQuery queryBuilder = new BoolQuery.Builder() .should(searchBestFields(text, keywords()),
.should(searchPartyFields(text), searchPhrasePrefix(text, fields()))
searchShopFields(text),
searchContractFields(text),
searchContractorFields(text),
searchWalletFields(text))
.build(); .build();
return openSearchClient.search(
return openSearchClient.search(s -> s s -> s
.size(responseLimit) .size(responseLimit)
.query(new Query.Builder() .query(new Query.Builder()
.bool(queryBuilder) .bool(queryBuilder)
@ -43,72 +38,46 @@ public class SearchDaoImpl implements SearchDao {
Party.class); Party.class);
} }
private Query searchContractorFields(String text) { private List<String> keywords() {
return new NestedQuery.Builder() return List.of(
.path(CONTRACTOR_INDEX) "id.keyword",
.query(new Query(new MultiMatchQuery.Builder() "contractors.id.keyword",
.fields("contractors.id", "contractors.russianLegalEntityInn.keyword",
"contractors.russianLegalEntityRussianBankAccount.keyword",
"contracts.id.keyword",
"shops.id.keyword",
"wallets.id.keyword");
}
private List<String> fields() {
return List.of(
"email",
"contractors.registeredUserEmail", "contractors.registeredUserEmail",
"contractors.russianLegalEntityRegisteredName", "contractors.russianLegalEntityRegisteredName",
"contractors.russianLegalEntityInn",
"contractors.russianLegalEntityRussianBankAccount",
"contractors.internationalLegalEntityLegalName", "contractors.internationalLegalEntityLegalName",
"contractors.internationalLegalEntityTradingName") "contractors.internationalLegalEntityTradingName",
.query(text)
.type(TextQueryType.Phrase)
.build()))
.scoreMode(ChildScoreMode.Sum)
.build().query();
}
private Query searchContractFields(String text) {
return new NestedQuery.Builder()
.path(CONTRACT_INDEX)
.query(new Query(new MultiMatchQuery.Builder()
.fields("contracts.id",
"contracts.legalAgreementId", "contracts.legalAgreementId",
"contracts.reportActSignerFullName") "contracts.reportActSignerFullName",
.query(text) "shops.locationUrl",
.type(TextQueryType.Phrase) "shops.detailsName",
.build())) "wallets.name");
.scoreMode(ChildScoreMode.Sum)
.build().query();
} }
private Query searchPartyFields(String text) { private Query searchBestFields(String text, List<String> fields) {
return new Query(new MultiMatchQuery.Builder() return new Query(new MultiMatchQuery.Builder()
.fields("id", .fields(fields)
"email")
.query(text) .query(text)
.type(TextQueryType.Phrase) .type(TextQueryType.BestFields)
.operator(Operator.Or)
.build()); .build());
} }
private Query searchShopFields(String text) { private Query searchPhrasePrefix(String text, List<String> fields) {
return new NestedQuery.Builder() return new Query(new MultiMatchQuery.Builder()
.path(SHOP_INDEX) .fields(fields)
.query(new Query(new MultiMatchQuery.Builder()
.fields("shops.id",
"shops.locationUrl",
"shops.detailsName")
.query(text) .query(text)
.type(TextQueryType.Phrase) .type(TextQueryType.PhrasePrefix)
.build())) .operator(Operator.Or)
.scoreMode(ChildScoreMode.Sum) .build());
.build().query();
} }
private Query searchWalletFields(String text) {
return new NestedQuery.Builder()
.path(WALLET_INDEX)
.query(new Query(new MultiMatchQuery.Builder()
.fields("wallets.id",
"wallets.name")
.query(text)
.type(TextQueryType.Phrase)
.build()))
.scoreMode(ChildScoreMode.Sum)
.build().query();
}
} }

View File

@ -8,13 +8,19 @@ import dev.vality.deanonimus.handler.DeanonimusServiceHandler;
import dev.vality.deanonimus.service.OpenSearchService; import dev.vality.deanonimus.service.OpenSearchService;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import org.apache.thrift.TException; import org.apache.thrift.TException;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.opensearch.client.opensearch.OpenSearchClient; import org.opensearch.client.opensearch.OpenSearchClient;
import org.opensearch.client.opensearch.indices.DeleteIndexRequest;
import org.opensearch.client.opensearch.indices.ExistsRequest;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.UUID;
import static dev.vality.deanonimus.constant.OpenSearchConstants.PARTY_INDEX;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
public class ReadTest extends AbstractIntegrationTest { public class ReadTest extends AbstractIntegrationTest {
@ -42,6 +48,13 @@ public class ReadTest extends AbstractIntegrationTest {
private static final String INN = "1234234123"; private static final String INN = "1234234123";
private static final String ACCOUNT = "9999999999"; private static final String ACCOUNT = "9999999999";
@BeforeEach
void setUp() throws IOException {
var indices = client.indices();
if (indices.exists(new ExistsRequest.Builder().index(PARTY_INDEX).build()).value()) {
indices.delete(new DeleteIndexRequest.Builder().index(PARTY_INDEX).build());
}
}
@Test @Test
void searchByPartyId() throws TException { void searchByPartyId() throws TException {
@ -54,6 +67,30 @@ public class ReadTest extends AbstractIntegrationTest {
.anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(EMAIL))); .anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(EMAIL)));
} }
@Test
void searchByPartyIdAdds() throws TException {
var id = UUID.randomUUID().toString();
var mail = "asd zxc fgh";
givenParty(id, mail);
refreshIndices();
var searchHits = deanonimusServiceHandler.searchParty(mail);
assertFalse(searchHits.isEmpty());
assertTrue(searchHits.stream()
.anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(mail)));
searchHits = deanonimusServiceHandler.searchParty(id);
assertFalse(searchHits.isEmpty());
assertTrue(searchHits.stream()
.anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(mail)));
// match partial field
searchHits = deanonimusServiceHandler.searchParty("asd z");
assertFalse(searchHits.isEmpty());
assertTrue(searchHits.stream()
.anyMatch(partySearchHit -> partySearchHit.getParty().getEmail().contains(mail)));
// for keyword match only full field
searchHits = deanonimusServiceHandler.searchParty(id.substring(0, 8));
assertTrue(searchHits.isEmpty());
}
@Test @Test
void searchByPartyIdWithoutTokens() throws TException { void searchByPartyIdWithoutTokens() throws TException {
givenParty(PARTY + "-test-kek", EMAIL + "1"); givenParty(PARTY + "-test-kek", EMAIL + "1");
@ -169,6 +206,34 @@ public class ReadTest extends AbstractIntegrationTest {
.getRegisteredUser().getEmail().contains(EMAIL))); .getRegisteredUser().getEmail().contains(EMAIL)));
} }
@Test
void searchByContractorEmailAdds() throws TException {
var id = UUID.randomUUID().toString();
var mail = "asd zxc fgh";
var party = givenParty(PARTY, null);
givenRegisteredUserContractor(party, id, mail);
refreshIndices();
var searchHits = deanonimusServiceHandler.searchParty(mail);
assertFalse(searchHits.isEmpty());
assertTrue(searchHits.stream()
.anyMatch(partySearchHit -> partySearchHit.getParty().getContractors().get(id).getContractor()
.getRegisteredUser().getEmail().contains(mail)));
searchHits = deanonimusServiceHandler.searchParty(id);
assertFalse(searchHits.isEmpty());
assertTrue(searchHits.stream()
.anyMatch(partySearchHit -> partySearchHit.getParty().getContractors().get(id).getContractor()
.getRegisteredUser().getEmail().contains(mail)));
// match partial field
searchHits = deanonimusServiceHandler.searchParty("asd z");
assertFalse(searchHits.isEmpty());
assertTrue(searchHits.stream()
.anyMatch(partySearchHit -> partySearchHit.getParty().getContractors().get(id).getContractor()
.getRegisteredUser().getEmail().contains(mail)));
// for keyword match only full field
searchHits = deanonimusServiceHandler.searchParty(id.substring(0, 8));
assertTrue(searchHits.isEmpty());
}
@Test @Test
void searchByContractorRussianLegalEntityRegisteredNameWithOneWord() throws TException { void searchByContractorRussianLegalEntityRegisteredNameWithOneWord() throws TException {
Party party = givenParty(PARTY, null); Party party = givenParty(PARTY, null);
@ -357,6 +422,7 @@ public class ReadTest extends AbstractIntegrationTest {
openSearchService.updateParty(party); openSearchService.updateParty(party);
} }
@SneakyThrows @SneakyThrows
private void refreshIndices() { private void refreshIndices() {
client.indices().refresh(); client.indices().refresh();

View File

@ -0,0 +1,371 @@
{
"party": {
"mappings": {
"properties": {
"blocking": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"contractors": {
"properties": {
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"internationalLegalEntityActualAddress": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"internationalLegalEntityLegalName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"internationalLegalEntityRegisteredAddress": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"internationalLegalEntityRegisteredNumber": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"internationalLegalEntityTradingName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"legalEntity": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"partyId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"registeredUserEmail": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityActualAddress": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityInn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityPostAddress": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityRegisteredName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityRegisteredNumber": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityRussianBankAccount": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityRussianBankBik": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityRussianBankName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"russianLegalEntityRussianBankPostAccount": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"contracts": {
"properties": {
"contractorId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"legalAgreementId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"partyId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"paymentInstitutionId": {
"type": "long"
},
"status": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"termsId": {
"type": "long"
}
}
},
"email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"shops": {
"properties": {
"accountCurrencyCode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"accountGuarantee": {
"type": "long"
},
"accountPayout": {
"type": "long"
},
"accountSettlement": {
"type": "long"
},
"blocking": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"categoryId": {
"type": "long"
},
"contractId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"detailsDescription": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"detailsName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"locationUrl": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"payoutToolId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"suspension": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"suspension": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}