Created
April 23, 2020 19:22
-
-
Save cfitz/12b46314efb51eee1f1d604977cb85de to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import synonyms from './synonyms' | |
| // Keywords that should not be stemmed | |
| const swedishKeywords = ['senior', 'island'] | |
| const indexConfig = { | |
| settings: { | |
| index: { | |
| 'mapping.total_fields.limit': 2200, | |
| number_of_shards: 1, | |
| number_of_replicas: 2, | |
| }, | |
| analysis: { | |
| normalizer: { | |
| downcased_and_folded: { | |
| type: 'custom', | |
| filter: ['lowercase', 'asciifolding'], | |
| }, | |
| downcased: { | |
| type: 'custom', | |
| filter: ['lowercase'], | |
| }, | |
| alpha_numeric: { | |
| type: 'custom', | |
| char_filter: ['alpha_numeric_filter'], | |
| filter: ['lowercase'], | |
| }, | |
| }, | |
| filter: { | |
| synonyms: { | |
| type: 'synonym', | |
| synonyms, | |
| }, | |
| swedish_stop: { | |
| type: 'stop', | |
| stopwords: '_swedish_', | |
| }, | |
| swedish_stemmer: { | |
| type: 'stemmer', | |
| language: 'swedish', | |
| }, | |
| swedish_keywords: { | |
| type: 'keyword_marker', | |
| keywords: swedishKeywords, | |
| }, | |
| '22_char_hyphenation_decompound': { | |
| type: 'hyphenation_decompounder', | |
| hyphenation_patterns_path: '/elasticsearch/config/analysis/se.xml', | |
| word_list_path: '/elasticsearch/config/analysis/swedish_words.txt', | |
| max_subword_size: 22, | |
| min_subword_size: 4, | |
| }, | |
| }, | |
| char_filter: { | |
| alpha_numeric_filter: { | |
| type: 'pattern_replace', | |
| pattern: '[^A-ZÅÄÖa-zåäö0-9 ]', | |
| replacement: '', | |
| }, | |
| }, | |
| analyzer: { | |
| custom_swedish: { | |
| tokenizer: 'standard', | |
| filter: [ | |
| 'lowercase', | |
| // 'swedish_stop', | |
| 'swedish_keywords', | |
| 'swedish_stemmer', | |
| 'synonyms', | |
| ], | |
| }, | |
| swedish_decompounder: { | |
| tokenizer: 'standard', | |
| filter: [ | |
| 'lowercase', | |
| // 'swedish_stop', | |
| 'swedish_keywords', | |
| 'swedish_stemmer', | |
| 'synonyms', | |
| '22_char_hyphenation_decompound', | |
| ], | |
| }, | |
| }, | |
| }, | |
| }, | |
| mappings: { | |
| _doc: { | |
| dynamic_templates: [ | |
| { | |
| bag_of_dates: { | |
| match: '*_time', | |
| mapping: { type: 'date' }, | |
| }, | |
| }, | |
| { | |
| bag_of_sort_dates: { | |
| match: '*_date', | |
| mapping: { type: 'date' }, | |
| }, | |
| }, | |
| { | |
| files: { | |
| match: 'files.*', | |
| mapping: { type: 'keyword' }, | |
| }, | |
| }, | |
| { | |
| swedish: { | |
| path_match: '*_sv', | |
| mapping: { | |
| type: 'text', | |
| analyzer: 'swedish_decompounder', | |
| search_analyzer: 'custom_swedish', | |
| search_quote_analyzer: 'simple', | |
| }, | |
| }, | |
| }, | |
| { | |
| normalized: { | |
| path_match: '*_normalized', | |
| mapping: { | |
| type: 'keyword', | |
| normalizer: 'downcased_and_folded', | |
| }, | |
| }, | |
| }, | |
| { | |
| downcased: { | |
| path_match: '*_downcased', | |
| mapping: { | |
| type: 'keyword', | |
| normalizer: 'downcased', | |
| }, | |
| }, | |
| }, | |
| { | |
| facets: { | |
| match_pattern: 'regex', | |
| match: '^(language_level)|(keywords)|(subject_tree)|(categories)$', | |
| mapping: { | |
| type: 'keyword', | |
| copy_to: ['{name}_normalized', '{name}_downcased', '{name}_sv'], | |
| }, | |
| }, | |
| }, | |
| { | |
| titles: { | |
| match_pattern: 'regex', | |
| match: '(.*title)|(usp)$', | |
| mapping: { | |
| type: 'text', | |
| analyzer: 'swedish_decompounder', | |
| search_analyzer: 'custom_swedish', | |
| search_quote_analyzer: 'simple', | |
| copy_to: ['{name}_sort', 'all_titles'], | |
| }, | |
| }, | |
| }, | |
| { | |
| sorts: { | |
| path_match: '*_sort', | |
| mapping: { type: 'keyword', normalizer: 'alpha_numeric' }, | |
| }, | |
| }, | |
| { | |
| languages: { | |
| match_pattern: 'regex', | |
| match: | |
| '^(languages)|(languages_translations)|(subtitle_languages)$', | |
| mapping: { | |
| type: 'keyword', | |
| copy_to: [ | |
| '{name}_normalized', | |
| '{name}_downcased', | |
| '{name}_sv', | |
| 'all_languages_and_codes_normalized', | |
| ], | |
| }, | |
| }, | |
| }, | |
| ], | |
| properties: { | |
| description: { type: 'text', analyzer: 'swedish' }, | |
| episode_number: { type: 'integer' }, | |
| number_of_episodes: { type: 'integer' }, | |
| duration: { type: 'integer' }, | |
| product_type: { type: 'keyword' }, | |
| format: { type: 'keyword' }, | |
| typical_age_range: { type: 'keyword' }, | |
| has_teacher_resource: { type: 'boolean' }, | |
| published_at: { type: 'date' }, | |
| subject_tree_raw: { | |
| type: 'keyword', | |
| normalizer: 'downcased_and_folded', | |
| }, | |
| participants: { | |
| type: 'nested', | |
| properties: { | |
| firstname: { | |
| type: 'keyword', | |
| copy_to: ['firstname_sv', 'fullname'], | |
| }, | |
| lastname: { | |
| type: 'keyword', | |
| copy_to: ['lastname_sv', 'fullname'], | |
| }, | |
| fullname: { type: 'keyword', copy_to: 'fullname_sv' }, | |
| role: { type: 'keyword' }, | |
| profession: { type: 'keyword' }, | |
| }, | |
| }, | |
| main_genre: { type: 'keyword' }, | |
| emotional_tags: { type: 'keyword' }, | |
| modified: { type: 'date' }, | |
| subtitle_languages: { type: 'keyword' }, | |
| image: { type: 'object', enabled: false }, | |
| age_ranges: { | |
| type: 'nested', | |
| properties: { | |
| from: { type: 'integer' }, | |
| to: { type: 'integer' }, | |
| }, | |
| }, | |
| accessible_platforms: { | |
| type: 'nested', | |
| dynamic: true, | |
| }, | |
| platforms: { type: 'keyword' }, | |
| broadcasts: { | |
| type: 'nested', | |
| dynamic: true, | |
| properties: { | |
| platform: { type: 'keyword' }, | |
| }, | |
| }, | |
| files: { | |
| type: 'nested', | |
| dynamic: true, | |
| }, | |
| pod_info: { | |
| type: 'object', | |
| properties: { | |
| is_downloadable: { type: 'boolean' }, | |
| file: { type: 'keyword' }, | |
| filename: { type: 'keyword' }, | |
| format: { type: 'keyword' }, | |
| }, | |
| }, | |
| sab: { type: 'keyword' }, | |
| streaming_info: { | |
| type: 'object', | |
| enabled: false, | |
| }, | |
| seo_description: { | |
| type: 'text', | |
| analyzer: 'swedish_decompounder', | |
| search_analyzer: 'custom_swedish', | |
| search_quote_analyzer: 'simple', | |
| }, | |
| only_in_sweden: { type: 'boolean' }, | |
| parental_lock: { type: 'boolean' }, | |
| is_sign_laguage_interpreted: { type: 'boolean' }, | |
| is_audio_described: { type: 'boolean' }, | |
| easy_to_read: { type: 'boolean' }, | |
| super_series_id: { type: 'integer' }, | |
| series_id: { type: 'integer' }, | |
| program_ids: { type: 'integer' }, | |
| product_views: { type: 'integer' }, | |
| }, | |
| }, | |
| }, | |
| } | |
| export default indexConfig |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment