Skip to content

Instantly share code, notes, and snippets.

@yusufunlu
Created August 7, 2020 12:47
Show Gist options
  • Select an option

  • Save yusufunlu/c3cf6116747c5309832b97d64c680ab5 to your computer and use it in GitHub Desktop.

Select an option

Save yusufunlu/c3cf6116747c5309832b97d64c680ab5 to your computer and use it in GitHub Desktop.
package com.artiwise.newswise.supernova.hadron;
import com.artiwise.newswise.supernova.model.DTO.ConditionDto;
import com.artiwise.newswise.supernova.model.DTO.RuleObject;
import com.artiwise.newswise.supernova.model.DTO.proxy.RuleProxy;
import com.artiwise.newswise.supernova.model.enums.ConditionTypeEnum;
import com.artiwise.newswise.supernova.model.enums.Operator;
import com.artiwise.newswise.supernova.service.PayloadTrieCacheService;
import com.artiwise.newswise.supernova.service.RuleService;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.ahocorasick.trie.PayloadTrie;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.*;
import static com.artiwise.newswise.supernova.hadron.FilterPipelineAhoCorasick.*;
import static com.artiwise.newswise.supernova.util.StringUtils.cleanExtraWhitespaceCharacter;
import static com.artiwise.newswise.supernova.util.StringUtils.cleanNonWordCharacter;
@Slf4j
@Service
@Getter
@Setter
public class TrieBuilder {
private PayloadTrieCacheService payloadTrieCacheService;
private RuleService ruleService;
public TrieBuilder(PayloadTrieCacheService payloadTrieCacheService, RuleService ruleService) {
this.payloadTrieCacheService = payloadTrieCacheService;
this.ruleService = ruleService;
}
private static final String param = "TrieBuilder";
public PayloadTrie<Set<ConditionRulePair>> getFromControlledCache() {
PayloadTrie<Set<ConditionRulePair>> fromCache = payloadTrieCacheService.getFromCache(param);
if (fromCache == null) {
log.info("Oups - Cache was empty. Going to populate it");
List<RuleProxy> ruleDtoList = ruleService.getRuleProxyObjects();
PayloadTrie<Set<ConditionRulePair>> payloadTrie = buildCommonRulePayloadTrie(ruleDtoList);
PayloadTrie<Set<ConditionRulePair>> newValue = payloadTrieCacheService.populateCache(param, payloadTrie);
log.info("Populated Cache with: {}", newValue);
return newValue;
}
log.info("Returning from Cache: {}", fromCache);
return fromCache;
}
public PayloadTrie<Set<ConditionRulePair>> buildCommonRulePayloadTrie(List<RuleProxy> ruleDtoList) {
Map<String, Set<ConditionRulePair>> conditionPayloadMap = new HashMap<>();
ruleDtoList.stream().filter(ruleDto -> !ruleDto.getConditionList().isEmpty())
.forEach(ruleProxy -> {
ruleProxy.getConditionList().stream().forEach(conditionDto -> {
for(String keyword : conditionDto.getValues()) {
String phrase = buildPhrase(keyword, conditionDto.getType());
ConditionRulePair pair = new ConditionRulePair(keyword, conditionDto.getType(),
ruleProxy, !conditionDto.getOperator().equals(Operator.NOT));
if (conditionPayloadMap.containsKey(phrase)) {
conditionPayloadMap.get(phrase).add(pair);
} else {
Set<ConditionRulePair> payload = new TreeSet<ConditionRulePair>();
payload.add(pair);
conditionPayloadMap.put(phrase, payload);
}
}
});
});
PayloadTrie.PayloadTrieBuilder<Set<ConditionRulePair>> includeBuilder = PayloadTrie.builder();
includeBuilder.onlyWholeWords();
if (!conditionPayloadMap.isEmpty()) {
conditionPayloadMap.forEach(includeBuilder::addKeyword);
}
return includeBuilder.build();
}
public PayloadTrie<Set<KeywordRulePair>> buildIncludeRulePayloadTrie(List<RuleProxy> ruleDtoList) {
Map<String, Set<KeywordRulePair>> includeMap = new HashMap<>();
ruleDtoList.stream().filter(ruleDto -> !ruleDto.getConditionList().isEmpty())
.forEach(ruleDto -> {
Optional<ConditionDto> keywordConditionOptional = ruleDto.getConditionList().stream()
.filter(conditionDto -> conditionDto.getType().equals(ConditionTypeEnum.KEYWORD))
.findFirst();
if(keywordConditionOptional.isPresent()) {
ConditionDto keywordCondition = keywordConditionOptional.get();
//include rule - !NOT ile exclude rule - NOT listeye eklenmeli
//if(ruleDto.isInclude() ^ keywordCondition.getOperator().equals(Operator.NOT)) {
if(ruleDto.isInclude()) {
for(String keyword : keywordCondition.getValues()) {
String phrase = buildPhrase(keyword);
KeywordRulePair pair = new KeywordRulePair(keyword, ruleDto);
if (includeMap.containsKey(phrase)) {
includeMap.get(phrase).add(pair);
} else {
Set<KeywordRulePair> payload = new TreeSet<KeywordRulePair>();
payload.add(pair);
includeMap.put(phrase, payload);
}
}
}
} else {
log.debug("Rule id {} has no keyword condition", ruleDto.getId());
}
});
PayloadTrie.PayloadTrieBuilder<Set<KeywordRulePair>> includeBuilder = PayloadTrie.builder();
includeBuilder.onlyWholeWords();
if (!includeMap.isEmpty()) {
includeMap.forEach(includeBuilder::addKeyword);
}
return includeBuilder.build();
}
public PayloadTrie<Set<KeywordRulePair>> buildExcludeRulePayloadTrie(List<RuleProxy> ruleDtoList) {
Map<String, Set<KeywordRulePair>> excludeMap = new HashMap<>();
ruleDtoList.stream().filter(ruleDto -> !ruleDto.getConditionList().isEmpty())
.forEach(ruleDto -> {
Optional<ConditionDto> keywordConditionOptional = ruleDto.getConditionList().stream()
.filter(conditionDto -> conditionDto.getType().equals(ConditionTypeEnum.KEYWORD))
.findFirst();
if(keywordConditionOptional.isPresent()) {
ConditionDto keywordCondition = keywordConditionOptional.get();
//include rule - NOT ile exclude rule - !NOT listeye eklenmeli
if( !(ruleDto.isInclude())) {
for(String keyword : keywordCondition.getValues()) {
String phrase = buildPhrase(keyword);
KeywordRulePair pair = new KeywordRulePair(keyword, ruleDto);
if (excludeMap.containsKey(phrase)) {
excludeMap.get(phrase).add(pair);
} else {
Set<KeywordRulePair> payload = new TreeSet<KeywordRulePair>();
payload.add(pair);
excludeMap.put(phrase, payload);
}
}
}
} else {
log.debug("Rule id {} has no keyword condition", ruleDto.getId());
}
});
PayloadTrie.PayloadTrieBuilder<Set<KeywordRulePair>> excludeBuilder = PayloadTrie.builder();
excludeBuilder.onlyWholeWords();
if (!excludeMap.isEmpty()) {
excludeMap.forEach(excludeBuilder::addKeyword);
}
return excludeBuilder.build();
}
private String buildPhrase(String keyword) {
if (keyword != null && !keyword.isEmpty())
keyword = keyword.trim();
if (keyword != null && !keyword.isEmpty()) {
keyword = cleanNonWordCharacter(keyword);
keyword = cleanExtraWhitespaceCharacter(keyword);
keyword = keyword.trim();
keyword = keyword.toLowerCase(new Locale("tr", "TR"));
}
return keyword;
}
private String buildPhrase(String conditionValue, ConditionTypeEnum conditionTypeEnum) {
if(conditionTypeEnum.equals(ConditionTypeEnum.KEYWORD)){
if (conditionValue != null && !conditionValue.isEmpty())
conditionValue = conditionValue.trim();
if (conditionValue != null && !conditionValue.isEmpty()) {
conditionValue = cleanNonWordCharacter(conditionValue);
conditionValue = cleanExtraWhitespaceCharacter(conditionValue);
conditionValue = conditionValue.trim();
conditionValue = conditionValue.toLowerCase(new Locale("tr", "TR"));
}
} else if(conditionTypeEnum.equals(ConditionTypeEnum.CATEGORY)){
conditionValue = String.format("%s%s", CATEGORY_PREFIX, conditionValue);
}else if(conditionTypeEnum.equals(ConditionTypeEnum.SITE)){
conditionValue = String.format("%s%s", SITE_PREFIX, conditionValue);
}else if(conditionTypeEnum.equals(ConditionTypeEnum.SITETYPE)){
conditionValue = String.format("%s%s", SITETYPE_PREFIX, conditionValue);
}else if(conditionTypeEnum.equals(ConditionTypeEnum.LANGUAGE)){
conditionValue = String.format("%s%s", LANGUAGE_PREFIX, conditionValue);
}else if(conditionTypeEnum.equals(ConditionTypeEnum.CONTENTTYPE)){
conditionValue = String.format("%s%s", CONTENTTYPE_PREFIX, conditionValue);
}
return conditionValue;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment