Created
August 7, 2020 12:47
-
-
Save yusufunlu/c3cf6116747c5309832b97d64c680ab5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package com.artiwise.newswise.supernova.hadron; | |
| import com.artiwise.newswise.supernova.model.DTO.ConditionDto; | |
| import com.artiwise.newswise.supernova.model.DTO.RuleObject; | |
| import com.artiwise.newswise.supernova.model.DTO.proxy.RuleProxy; | |
| import com.artiwise.newswise.supernova.model.enums.ConditionTypeEnum; | |
| import com.artiwise.newswise.supernova.model.enums.Operator; | |
| import com.artiwise.newswise.supernova.service.PayloadTrieCacheService; | |
| import com.artiwise.newswise.supernova.service.RuleService; | |
| import lombok.Getter; | |
| import lombok.Setter; | |
| import lombok.extern.slf4j.Slf4j; | |
| import org.ahocorasick.trie.PayloadTrie; | |
| import org.springframework.beans.factory.annotation.Autowired; | |
| import org.springframework.stereotype.Service; | |
| import java.util.*; | |
| import static com.artiwise.newswise.supernova.hadron.FilterPipelineAhoCorasick.*; | |
| import static com.artiwise.newswise.supernova.util.StringUtils.cleanExtraWhitespaceCharacter; | |
| import static com.artiwise.newswise.supernova.util.StringUtils.cleanNonWordCharacter; | |
| @Slf4j | |
| @Service | |
| @Getter | |
| @Setter | |
| public class TrieBuilder { | |
| private PayloadTrieCacheService payloadTrieCacheService; | |
| private RuleService ruleService; | |
| public TrieBuilder(PayloadTrieCacheService payloadTrieCacheService, RuleService ruleService) { | |
| this.payloadTrieCacheService = payloadTrieCacheService; | |
| this.ruleService = ruleService; | |
| } | |
| private static final String param = "TrieBuilder"; | |
| public PayloadTrie<Set<ConditionRulePair>> getFromControlledCache() { | |
| PayloadTrie<Set<ConditionRulePair>> fromCache = payloadTrieCacheService.getFromCache(param); | |
| if (fromCache == null) { | |
| log.info("Oups - Cache was empty. Going to populate it"); | |
| List<RuleProxy> ruleDtoList = ruleService.getRuleProxyObjects(); | |
| PayloadTrie<Set<ConditionRulePair>> payloadTrie = buildCommonRulePayloadTrie(ruleDtoList); | |
| PayloadTrie<Set<ConditionRulePair>> newValue = payloadTrieCacheService.populateCache(param, payloadTrie); | |
| log.info("Populated Cache with: {}", newValue); | |
| return newValue; | |
| } | |
| log.info("Returning from Cache: {}", fromCache); | |
| return fromCache; | |
| } | |
| public PayloadTrie<Set<ConditionRulePair>> buildCommonRulePayloadTrie(List<RuleProxy> ruleDtoList) { | |
| Map<String, Set<ConditionRulePair>> conditionPayloadMap = new HashMap<>(); | |
| ruleDtoList.stream().filter(ruleDto -> !ruleDto.getConditionList().isEmpty()) | |
| .forEach(ruleProxy -> { | |
| ruleProxy.getConditionList().stream().forEach(conditionDto -> { | |
| for(String keyword : conditionDto.getValues()) { | |
| String phrase = buildPhrase(keyword, conditionDto.getType()); | |
| ConditionRulePair pair = new ConditionRulePair(keyword, conditionDto.getType(), | |
| ruleProxy, !conditionDto.getOperator().equals(Operator.NOT)); | |
| if (conditionPayloadMap.containsKey(phrase)) { | |
| conditionPayloadMap.get(phrase).add(pair); | |
| } else { | |
| Set<ConditionRulePair> payload = new TreeSet<ConditionRulePair>(); | |
| payload.add(pair); | |
| conditionPayloadMap.put(phrase, payload); | |
| } | |
| } | |
| }); | |
| }); | |
| PayloadTrie.PayloadTrieBuilder<Set<ConditionRulePair>> includeBuilder = PayloadTrie.builder(); | |
| includeBuilder.onlyWholeWords(); | |
| if (!conditionPayloadMap.isEmpty()) { | |
| conditionPayloadMap.forEach(includeBuilder::addKeyword); | |
| } | |
| return includeBuilder.build(); | |
| } | |
| public PayloadTrie<Set<KeywordRulePair>> buildIncludeRulePayloadTrie(List<RuleProxy> ruleDtoList) { | |
| Map<String, Set<KeywordRulePair>> includeMap = new HashMap<>(); | |
| ruleDtoList.stream().filter(ruleDto -> !ruleDto.getConditionList().isEmpty()) | |
| .forEach(ruleDto -> { | |
| Optional<ConditionDto> keywordConditionOptional = ruleDto.getConditionList().stream() | |
| .filter(conditionDto -> conditionDto.getType().equals(ConditionTypeEnum.KEYWORD)) | |
| .findFirst(); | |
| if(keywordConditionOptional.isPresent()) { | |
| ConditionDto keywordCondition = keywordConditionOptional.get(); | |
| //include rule - !NOT ile exclude rule - NOT listeye eklenmeli | |
| //if(ruleDto.isInclude() ^ keywordCondition.getOperator().equals(Operator.NOT)) { | |
| if(ruleDto.isInclude()) { | |
| for(String keyword : keywordCondition.getValues()) { | |
| String phrase = buildPhrase(keyword); | |
| KeywordRulePair pair = new KeywordRulePair(keyword, ruleDto); | |
| if (includeMap.containsKey(phrase)) { | |
| includeMap.get(phrase).add(pair); | |
| } else { | |
| Set<KeywordRulePair> payload = new TreeSet<KeywordRulePair>(); | |
| payload.add(pair); | |
| includeMap.put(phrase, payload); | |
| } | |
| } | |
| } | |
| } else { | |
| log.debug("Rule id {} has no keyword condition", ruleDto.getId()); | |
| } | |
| }); | |
| PayloadTrie.PayloadTrieBuilder<Set<KeywordRulePair>> includeBuilder = PayloadTrie.builder(); | |
| includeBuilder.onlyWholeWords(); | |
| if (!includeMap.isEmpty()) { | |
| includeMap.forEach(includeBuilder::addKeyword); | |
| } | |
| return includeBuilder.build(); | |
| } | |
| public PayloadTrie<Set<KeywordRulePair>> buildExcludeRulePayloadTrie(List<RuleProxy> ruleDtoList) { | |
| Map<String, Set<KeywordRulePair>> excludeMap = new HashMap<>(); | |
| ruleDtoList.stream().filter(ruleDto -> !ruleDto.getConditionList().isEmpty()) | |
| .forEach(ruleDto -> { | |
| Optional<ConditionDto> keywordConditionOptional = ruleDto.getConditionList().stream() | |
| .filter(conditionDto -> conditionDto.getType().equals(ConditionTypeEnum.KEYWORD)) | |
| .findFirst(); | |
| if(keywordConditionOptional.isPresent()) { | |
| ConditionDto keywordCondition = keywordConditionOptional.get(); | |
| //include rule - NOT ile exclude rule - !NOT listeye eklenmeli | |
| if( !(ruleDto.isInclude())) { | |
| for(String keyword : keywordCondition.getValues()) { | |
| String phrase = buildPhrase(keyword); | |
| KeywordRulePair pair = new KeywordRulePair(keyword, ruleDto); | |
| if (excludeMap.containsKey(phrase)) { | |
| excludeMap.get(phrase).add(pair); | |
| } else { | |
| Set<KeywordRulePair> payload = new TreeSet<KeywordRulePair>(); | |
| payload.add(pair); | |
| excludeMap.put(phrase, payload); | |
| } | |
| } | |
| } | |
| } else { | |
| log.debug("Rule id {} has no keyword condition", ruleDto.getId()); | |
| } | |
| }); | |
| PayloadTrie.PayloadTrieBuilder<Set<KeywordRulePair>> excludeBuilder = PayloadTrie.builder(); | |
| excludeBuilder.onlyWholeWords(); | |
| if (!excludeMap.isEmpty()) { | |
| excludeMap.forEach(excludeBuilder::addKeyword); | |
| } | |
| return excludeBuilder.build(); | |
| } | |
| private String buildPhrase(String keyword) { | |
| if (keyword != null && !keyword.isEmpty()) | |
| keyword = keyword.trim(); | |
| if (keyword != null && !keyword.isEmpty()) { | |
| keyword = cleanNonWordCharacter(keyword); | |
| keyword = cleanExtraWhitespaceCharacter(keyword); | |
| keyword = keyword.trim(); | |
| keyword = keyword.toLowerCase(new Locale("tr", "TR")); | |
| } | |
| return keyword; | |
| } | |
| private String buildPhrase(String conditionValue, ConditionTypeEnum conditionTypeEnum) { | |
| if(conditionTypeEnum.equals(ConditionTypeEnum.KEYWORD)){ | |
| if (conditionValue != null && !conditionValue.isEmpty()) | |
| conditionValue = conditionValue.trim(); | |
| if (conditionValue != null && !conditionValue.isEmpty()) { | |
| conditionValue = cleanNonWordCharacter(conditionValue); | |
| conditionValue = cleanExtraWhitespaceCharacter(conditionValue); | |
| conditionValue = conditionValue.trim(); | |
| conditionValue = conditionValue.toLowerCase(new Locale("tr", "TR")); | |
| } | |
| } else if(conditionTypeEnum.equals(ConditionTypeEnum.CATEGORY)){ | |
| conditionValue = String.format("%s%s", CATEGORY_PREFIX, conditionValue); | |
| }else if(conditionTypeEnum.equals(ConditionTypeEnum.SITE)){ | |
| conditionValue = String.format("%s%s", SITE_PREFIX, conditionValue); | |
| }else if(conditionTypeEnum.equals(ConditionTypeEnum.SITETYPE)){ | |
| conditionValue = String.format("%s%s", SITETYPE_PREFIX, conditionValue); | |
| }else if(conditionTypeEnum.equals(ConditionTypeEnum.LANGUAGE)){ | |
| conditionValue = String.format("%s%s", LANGUAGE_PREFIX, conditionValue); | |
| }else if(conditionTypeEnum.equals(ConditionTypeEnum.CONTENTTYPE)){ | |
| conditionValue = String.format("%s%s", CONTENTTYPE_PREFIX, conditionValue); | |
| } | |
| return conditionValue; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment