å šææ€çŽ¢ã䜿çšãããšãããã¹ãã³ã³ãã³ãã§ããã¥ã¡ã³ããæ€çŽ¢ã§ããŸãã ãã®ãããªå¿ èŠæ§ã¯ãã·ã¹ãã ã«å€ãã®ããã¹ããšã³ãã£ãã£ãå«ãŸããŠããããŠãŒã¶ãŒãæ€çŽ¢äžã«ãã®ããŒã¿ãèæ ®ããå¿ èŠãããå Žåã«çºçããããšããããŸãã ã¯ãŒã¯ãããŒãœãªã¥ãŒã·ã§ã³ãéçºãããšãã«ãåæ§ã®ç¶æ³ã«çŽé¢ããŸãã*ã ã·ã¹ãã ããŒã¿ã¯MS SQL ServerãŸãã¯PostgreSQLã«ä¿åãããæè»ãªå±æ§æ€çŽ¢ã«ãããããŸããŸãªã¡ã¿æ å ±ã䜿çšããŠããã¥ã¡ã³ããæ€çŽ¢ã§ããŸãã ããããæéãçµã€ã«ã€ããŠãããã¯ååã§ã¯ãããŸããã§ããã ããã¹ãããããã£ãšæ·»ä»ãã¡ã€ã«ã§ããã¥ã¡ã³ããæ€çŽ¢ããæ¹æ³ãåŠç¿ãããšãã課é¡ã«çŽé¢ããŸããã
åé¡ã¯ãå šææ€çŽ¢ãææã§ã®ã¿SQLãµãŒããŒã§ãµããŒããããå¿ èŠãªæè»æ§ãæäŸãããªãããšã§ãã ãã®ç¬éãæ€çŽ¢ãšã³ãžã³ãç»å ŽããŸãã ããšãã°ãSphinxãSolrãElasticsearchãªã©ãããŸããŸãªå šææ€çŽ¢ã·ã¹ãã ããããŸãã ããããç§ãã¡ã®éžæã¯ãããã®æåŸã«èœã¡çããŸããã äžè¬ã«ãããã¥ã¡ã³ãã®å€§èŠæš¡ãªåçããŒã¿ããŒã¹ãElasticsearchãããã³å šææ€çŽ¢çšã®Webã€ã³ã¿ãŒãã§ãŒã¹ãæã€é¡§å®¢ã®èŠæããããŸãã ãªãŒãã³ã³ããªãŒãããã³ãããã¡ã»ãããããã³ãªã³ã©ã€ã³ã¹ãã¢ã®æ©èœã«è¿ããã®ä»ã®æ©èœã ãã®åé¡ã®è§£æ±ºæ¹æ³ã«é¢ããèšäºã
* Docsvisionãã©ãããã©ãŒã ã®åªå ææžç®¡çã·ã¹ãã
建ç¯
ElasticsearchããŒã¿ããŒã¹ã§ã¯ãããŒãã«ã¯ã€ã³ããã¯ã¹ãšåŒã°ããããã¥ã¡ã³ããããŒãããããã»ã¹ã¯ã€ã³ããã¯ã¹ä»ããšåŒã°ããŸãã äž»èšæ¶åããã®ããŒã¿ã«ã€ã³ããã¯ã¹ãä»ããããã«ãElasticsearchã§ç¹å¥ãªãµãŒãã¹ãäœæãããŸããã ããã¯WindowsãµãŒãã¹ã§ãããããã«å ããŠç®¡çè ãŠãŒãã£ãªãã£ããããŸãã ãŠãŒãã£ãªãã£ã¯å¿ èŠãªèšå®ãèšå®ããã€ã³ããã¯ã¹ãäœæããããŒã¿ããŒã¹ãžã®ããã¥ã¡ã³ãã®ããŒããéå§ããŸãã
ãã ããããŒã¿ã®ã€ã³ããã¯ã¹äœæã®æ®µéã§åé¡ãçºçããŸããã ã·ã¹ãã ã¯åçã§ãããããã¥ã¡ã³ãã«ã¯æ¯åæ°åã®å€æŽãçºçããŸãã ã€ã³ããã¯ã¹ãµãŒãã¹ã¯ãElasticsearchããŒã¿ãçŸåšã®ç¶æ³ã«ã§ããã ãè¿ãç¶æããå¿ èŠããããŸãã ãã®ãããSQLããŒã¿ããŒã¹ã«æ°ãããšã³ãã£ãã£ïŒã€ã³ããã¯ã¹äœæçšã®ããã¥ã¡ã³ãã®ãã¥ãŒïŒã衚瀺ãããŸãã Nåããšã«ãç¹å¥ãªãžã§ããååã®å®è¡ä»¥éã«å€æŽããããã¹ãŠã®ããã¥ã¡ã³ããæ€çŽ¢ãããããã®èå¥åããã¥ãŒã«è¿œå ããŸãã ãã®çµæããµãŒãã¹ã¯ã€ã³ããã¯ã¹ã§ãããå¿ èŠãšããããã¥ã¡ã³ãã®ã¿ãæŽæ°ããŸãã
æè¡ã¹ã¿ãã¯
æ€çŽ¢ãšã³ãžã³ã Elasticsearch 5.5
ãã©ã°ã€ã³ åæ圢æ
åŠããã³æåã¢ã¿ããã¡ã³ã
ãµãŒãã¹ CïŒã§æžãããŠããŸãã ãšã³ãžã³ãšå¯Ÿè©±ããããã®ã©ã€ãã©ãªïŒNESTããã³ElasticsearchNETã
ããã³ããšã³ã è§åºŠ4
ããŠã³ããŒãèšå®
管çè ãŠãŒãã£ãªãã£-ã€ã³ããã¯ã¹ãäœæããããã¥ã¡ã³ããšãã£ãŒã«ãã®çš®é¡ãéžæããããã®ãµãŒãã¹ã®äžéšã ãã®åŸãããŒã¿ã¹ããŒã ãElasticsearchã«èªã¿èŸŒãŸããŸãïŒESãšã³ã·ã¹ãã ã§ã¯ããããã³ã°ãšåŒã°ããŸãïŒã ããã¯ãç°ãªããã£ãŒã«ããèšå®ã«èšå®ããããã«å¿ èŠã§ãããããã¥ã¡ã³ãã®ã€ã³ããã¯ã¹äœææã«èæ ®ãããŸãã ããã«ããŠãŒãã£ãªãã£ã¯éžæçµæãããŒã¿ããŒã¹ã«ä¿åããŸãã
ãããã³ã°åœ¢æ
ãããã³ã°ã¯ãNESTã©ã€ãã©ãªã䜿çšããŠåçã«çæãããŸãã ã·ã¹ãã ã®åããŒã¿ã¿ã€ãã¯ãElasticsearchããŒã¿ã¿ã€ãã«é¢é£ä»ããããŠããŸãã ããŒã¿ããŒã¹ã¯ãããã¥ã¡ã³ãã®éå±€æ§é ããµããŒãããŠããŸãã ããã¯ã ãªããžã§ã¯ããšãã¹ããããããŒã¿åïŒé åã®å ŽåïŒã«å¯Ÿå¿ããŸã ã
var sectionProperty = section.SectionType == SectionType.Struct ? new ObjectProperty { Name = section.Name } : new NestedProperty { Name = section.Name };
ããã¹ããã£ãŒã«ãåæ
Elasticsearchã¯ãåªããå šææ€çŽ¢æ©èœãæäŸããŸãã åèªåœ¢åŒãèæ ®ããã¹ãããã¯ãŒããã¹ãããããèšèªåœ¢æ ã䜿çšã§ããŸãã ãããè¡ãã«ã¯ããããã³ã°ãäœæãã段éã§ããããå¿ èŠãšããããã¹ããã£ãŒã«ãã«æ£ããã¢ãã©ã€ã¶ãŒãæå®ããå¿ èŠããããŸãã ãããã®èšå®ã¯ã管çè ãŠãŒãã£ãªãã£ã§ãæå®ãããŸãã
ã¢ãã©ã€ã¶ãŒã«ã¯ãåã ã®æåã®å€æãæåã®ããŒã¯ã³ãžã®åå²ãããã³ãããã®ããŒã¯ã³ã®åŠçã®3ã€ã®æ®µéãå«ãŸããŸãã ãã®å Žåãæåã®ãã£ã«ã¿ãªã³ã°ã¯å¿ èŠãããŸããã ããŒã¯ãã€ã¶ãŒãšããŠãæšæºçãªãã®ã䜿çšããŸããããã¯ã»ãšãã©ã®å Žåããã®ãŸãŸäœ¿çšã§ããŸãã ãã·ã¢èªã¢ãã©ã€ã¶ãŒã®äž»èŠãªãªã³ã¯ã¯ãå ¬åŒã®åæ圢æ åŠãã©ã°ã€ã³ã§ãã åèªåœ¢åŒã«åºã¥ããŠæ€çŽ¢ã§ããããŒã¯ã³ãã£ã«ã¿ãŒãæäŸããŸãã ãŸãããã¹ãŠã®åèªãå°æåã«ããç¬èªã®ã¹ãããã¯ãŒãã䜿çšããŸãã
var stopFilter = new StopTokenFilter { StopWords = new StopWords(StopWordsArray) }; var filters = new TokenFilters { { "my_stopwords", stopFilter } }; var rusAnalyzer = new CustomAnalyzer { Tokenizer = "standard", Filter = new[] { "lowercase", "russian_morphology", "my_stopwords" } }; var analazyers = new Analyzers { { "rus_analyzer", rusAnalyzer } }; var analyzis = new Analysis { Analyzers = analazyers, TokenFilters = filters };
ãã¡ã€ã«ã®èšå®
ElasticsearchããŒãžã§ã³5.0ã§ã¯ãæ°ãããšã³ãã£ãã£ã§ããIngestããŒããå°å ¥ãããŠããŸãã ãã®ãããªããŒãã¯ãããã¥ã¡ã³ããã€ã³ããã¯ã¹åããåã«ããã¥ã¡ã³ããåŠçããããã«äœ¿çšãããŸãã ãããè¡ãã«ã¯ã ãã€ãã©ã€ã³ãäœæããããã«ããã»ããµãè¿œå ããŸãã ä»»æã®ããŒããåã蟌ã¿ãšããŠäœ¿çšã§ããŸãã ãŸãã¯ããã©ã€ããªåŠççšã«å¥ã®ããŒããéžæã§ããŸãã
ã·ã¹ãã ã®å€ãã®ããã¥ã¡ã³ãã«ã¯ããã¹ããã¡ã€ã«ãå«ãŸããŠããŸãã å šææ€çŽ¢ã¯ãã³ã³ãã³ãã«å¯ŸããŠæ©èœããå¿ èŠããããŸãã ãããå®è£ ããããã«ãæè¿å°å ¥ããããã€ãã©ã€ã³æè¡ã䜿çšããIngest Attachmentãã©ã°ã€ã³ã䜿çšããŸããã ãã©ã°ã€ã³ãä»ããŠå©çšå¯èœãªåããã¥ã¡ã³ããã¡ã€ã«ã®ããã»ããµã䜿çšããããã»ããµãå®çŸ©ããŸãã ãã®ããã»ããµã®æ¬è³ªã¯ãå¥ã®ãã£ãŒã«ãã®Base64æååããããã¹ããæœåºããããšã§ãã æ®ã£ãŠããã®ã¯ãã€ã³ããã¯ã¹äœæäžã«ãã¡ã€ã«ããBase64æååãååŸããŠãããã³ã°ãéå§ããããšã ãã§ãã ããã»ããµã§ã¯ããã¡ã€ã«ãå«ããã£ãŒã«ã ïŒ Field ïŒãšããã¹ããé 眮ããå ŽæïŒ TargetFiled ïŒã瀺ããŸãã IndexedCharactersãèšå®ãããšãåŠçããããã¡ã€ã«ã®é·ããå¶éãããŸãïŒ-1ã«ããå¶éããªããªããŸãïŒã
new PutPipelineRequest(pipelineName) { Processors = new List<ProcessorBase> { new ForeachProcessor { Field = "Files", Processor = new AttachmentProcessor { TargetField = "_ingest._value.attachment", Field = "_ingest._value.RawContent", IndexedCharacters = -1 } } } };
玢åŒä»ã
ãµãŒãã¹ã®ã¿ã¹ã¯ã¯ããã¥ãŒããæ°ãããªããžã§ã¯ããç¶ç¶çã«æœåºããé¢é£ããããã¥ã¡ã³ãã«ã€ã³ããã¯ã¹ãä»ããããšã§ãã ãã®ããã»ã¹ã§ã¯ãNESTãªããžã§ã¯ãã¢ãã«ã§ã¯ãªããäœã¬ãã«ã®ElasticsearchNetã©ã€ãã©ãªã䜿çšããŸãã JSONãä»ããŠããŒã¿ããŒã¹ã€ã³ã¿ãŒãã§ã€ã¹ãæäŸããŸãã ããã¥ã¡ã³ãã®éå±€æ§é ã®æ·±ãããã©ããŒã¹ããããšã«ããããªããžã§ã¯ããåçã«åœ¢æããŸãã ããç¥ãããŠããNewtonsoftJsonã©ã€ãã©ãªãããã«äœ¿çšãããŸãã
client.LowLevel.IndexPut<string>(indexName, typeName, documentId, json);
ã€ã³ããã¯ã¹äœæã¯ãåããã¥ã¡ã³ãã®äžŠååŠçã§ãã«ãã¹ã¬ããã§å®è£ ãããŸãã JSONãçæããããã»ã¹ã¯ãJSONãã€ã³ããã¯ã¹åãããããæ¡éãã«é·ãããããŸãã ãããã£ãŠãAPIã¯ãããã¥ã¡ã³ãã®é åã1åã®åŒã³åºãã§ESã«ããŒããããBulk APIã§ã¯ãªããåã ã®ããã¥ã¡ã³ãã®ã€ã³ããã¯ã¹äœæã«äœ¿çšãããŸãã ãã®å Žåãã€ã³ããã¯ã¹ã¯æ倧ã®ããã¥ã¡ã³ãã®JSONçæã¬ãŒãã§çºçããŸãã
ãã¡ã€ã«ã®ã€ã³ããã¯ã¹ä»ã
ãã¡ã€ã«ã¯ãJSONãªããžã§ã¯ãã®äžéšãšããŠæ®ãã®ããŒã¿ãšãšãã«ã€ã³ããã¯ã¹ä»ããããŸãã å¿ èŠãªã®ã¯ããã€ãã¹ããªãŒã ãBase64æååã«å€æããããšã ãã§ãã ããã¯ãæšæºã©ã€ãã©ãªã䜿çšããŠè¡ãããŸãã ããã«ããã¡ã€ã«ãããã»ããµãŒå®çŸ©ã«è©²åœããå¿ èŠããããŸãã ããããªããšãéæ³ã¯çºçãããéåžžã®Base64æååã®ãŸãŸã«ãªããŸãã ã€ã³ããã¯ã¹äœææã«ãã€ãã©ã€ã³ã䜿çšããã«ã¯ãã¡ãœããåŒã³åºããå€æŽããŸãã
client.LowLevel.IndexPut<string>(indexName, typeName, documentId, json, parameters => parameters.Pipeline(pipelineName));
èªåè£å®
ãªãŒãã³ã³ããªãŒãïŒãªãŒãã³ã³ããªãŒãïŒã¯ããŠãŒã¶ãŒãå ¥åããè¡ã®ç¶ç¶ã®å¯èœæ§ã瀺åããŸãã
ãã®å ŽåããªãŒãã³ã³ããªãŒãã¯ã管çè ãŠãŒãã£ãªãã£ã§å¯Ÿå¿ãããã©ã°ãèšå®ãããŠããããã¹ããã£ãŒã«ãã§æ©èœããã¯ãã§ãã ãããã³ã°ãããŒããã段éã§ããã¹ãŠã®è£å®çãªè¡ã«å¯ŸããŠåå¥ã®ã€ã³ããã¯ã¹ãäœæãããŸãã ããã¯ãæ€çŽ¢ãè€æ°ã®ã€ã³ããã¯ã¹ã§æ©èœããå¿ èŠããããšããäºå®ã«ãããã®ã§ãã ãããã³ã°ã¯ãç¹å¥ãªã¿ã€ãã®è£å®ãã£ãŒã«ãã§åœ¢æãããŸãã
var completionProperty = new CompletionProperty { Name = "autocomplete", Analyzer = "simple", SearchAnalyzer = "simple" };
ããã¥ã¡ã³ãã®ã€ã³ããã¯ã¹ãäœæãããšããèªåè£å®ã«å¿ èŠãªããã¹ãã¯çšèªã®ã»ããã«åå²ãããã€ã³ããã¯ã¹ã«ããŒããããŸãã çšèªã¯æ£èŠè¡šçŸãæºãããŠããå¿ èŠããããŸããçããããæåã®ã¿ã§æ§æãããåèªã匷調ããããšãéèŠã§ãã ã»ããã¯1ã€ã®çšèªã ãé 次ã·ããããããããååèªã«å¯ŸããŠãã€ã³ããã¯ã¹ã§å§ãŸãè¡ããããŸãã ã»ããã®é·ãã¯äžããå¶éãããŠããããã4ã«çããcompleteSizeã䜿çšããŸãã
var regex = new Regex(pattern, RegexOptions.Compiled); var words = regex.Matches(text); for (var i = 0; i < words.Count; i++) { var inputWords = words.OfType<Match>().Skip(i).Take(completeSize).ToArray(); var wordValues = inputWords.Select(x => x.Value).ToArray(); var output = string.Join(" ", wordValues); // JSON }
æ€çŽ¢äžã«ããªãŒãã³ã³ããªãŒãã®åå¥ã®ãªã¯ãšã¹ããæ©èœããŠããŸãã åæåãå ¥åãããšã察å¿ãããµãã¹ããªã³ã°ã䜿çšããŠããŒã¿ããŒã¹ã«ã¢ã¯ã»ã¹ããŸãã Elasticsearchãžã®ãªã¯ãšã¹ãã¯jsonãªããžã§ã¯ãã§ãã ãªãŒãã³ã³ããªãŒããååŸããã«ã¯ããµãžã§ã¹ããããã¯ã®ã¿ãå¿ èŠã§ãã Completion Suggesterãå«ãŸããŠããããããã¬ãã£ãã¯ã¹ã§ãã°ããæ€çŽ¢ã§ããŸãã è£å®ãã£ãŒã«ãã§ã®ã¿æ©èœããŸãã ã¿ã€ããã¹ã«ã€ããŠè©±ãåããšããä»ã®ãµã°ã¹ã¿ãŒãšäŒããŸãã
{ "suggest": { "completion_suggest": { "text": " ", "completion": { "field": "autocomplete", "size": 10 } } } }
æ€çŽ¢ãã
ã€ã³ã¿ãŒãã§ã€ã¹ã®åºæ¬éšåã¯æ€çŽ¢æååã§ãã ãŠãŒã¶ãŒãæåãå ¥åãããšããªãŒãã³ã³ããªãŒããšæ€çŽ¢ã®2ã€ã®èŠæ±ãæºããããŸãã ãããã®æåã®çµæã«åºã¥ããŠãå°å·ãç¶è¡ããããã«ããã³ãââãã衚瀺ããã2çªç®ã«åŸã£ãŠãããã¥ã¡ã³ãã®çºè¡ãæ§ç¯ãããŸãã æ€çŽ¢ã¯ãšãªã¯è€æ°ã®ãããã¯ã§æ§æãããåãããã¯ã¯ç°ãªãããããã£ãæ åœããŸãã
å šææ€çŽ¢
ã¯ãšãªãããã¯ã¯ãã¯ãšãªã®æ€çŽ¢éšåã«å¯Ÿå¿ããŸãã 圌ã®ãããã§ãçºè¡ãããããã¥ã¡ã³ããéžæãããŸãã ä»ã®éèŠãªã¯ãšãªãããã¯ããããã®çµæã«é©çšãããŸãã ã¯ãšãªã¯ãããŒã«æŒç®ã䜿çšããŠæ¥ç¶ããããµãã¯ãšãªãæã€ããšãã§ããŸãã ãããè¡ãã«ã¯ã boolãããã¯ãå®çŸ©ããŸãã 4ã€ã®ã¿ã€ãã®æ¡ä»¶ãå«ããããšãã§ããŸãïŒ must ã filter ã must_not ã should ã ãã®ã¯ãšãªã§ã¯ãè«çORã«äžèŽããshouldæ¡ä»¶ã䜿çšããŸãã è€æ°ã®ãã«ããã¹ããµãã¯ãšãªãçµã¿åãããŸãã ãã£ã«ã¿ãŒãããã¯ã«æ»ããŸãããä»ã®ãšãããããã¥ã¡ã³ãã®ã»ããå šäœãæ¢ããŠãããšèããŠããŸãã
{ "query": { "bool": { "filter": [], "should": [ // ] } } // }
å šææ€çŽ¢ã®å Žåã multi_matchãããã¯ã䜿çšãããŸãã ã¯ãšãªãã©ã¡ãŒã¿ã®ããã¹ãã¯ã fieldsãã©ã¡ãŒã¿ã§æå®ãããããã€ãã®ãã£ãŒã«ãã§äžåºŠã«æ€çŽ¢ãããŸãã èŠæ±ãžã®å¿çãšããŠãããã¥ã¡ã³ãã®ãªã¹ããè¿ãããŸããåãªã¹ãã«ã¯ã¹ã³ã¢ããããŸãã ææžãã¯ãšãªã«äžèŽããã»ã©ããã®æ°å€ã¯é«ããªããŸãã multi_matchãªã¯ãšã¹ã㯠ãããã¹ããåäžã®ãã¬ãŒãºãšã¯èŠãªããŸããããåã ã®çšèªãæ€çŽ¢ããŸãã åæ§ã®ãããã¯ãè¿œå ããŸãããå¿ èŠãªæ©èœãå®è£ ãããã¬ãŒãºãã©ã¡ãŒã¿ãŒã䜿çšããŸãã äžèŽãããã¬ãŒãºãå«ãããã¥ã¡ã³ãã®å€ãé«ãããããã«ã boostãã©ã¡ãŒã¿ãŒãæå®ããŸãã ããã¥ã¡ã³ãã®ã¹ã³ã¢ã«æå®ãããæ°ãæããŸãã
{ "multi_match": { "query": " ", "fields": [ "FieldName" ] } }, { "multi_match": { "query": " ", "fields": [ "FieldName" ], "type": "phrase", "boost": 10 } }
èœãšãç©Žã®äžã§ããé åå ã®ãªããžã§ã¯ãéã®æ€çŽ¢ã«æ³šæããããšãã§ããŸãã ãããã³ã°ãäœæãããšãã«ãããã€ãã®ãã£ãŒã«ãããã¹ãããããã®ãšããŠããŒã¯ããŸãã ã ããã¯ããããããªããžã§ã¯ãã®é åã§ããããšãæå³ããŸãã ãããã®ãªããžã§ã¯ãã®ãã£ãŒã«ãã§æ€çŽ¢ããã«ã¯ã nestedãšåŒã°ããå¥ã®ãµãã¯ãšãªãå¿ èŠã§ãã ãã®äžã§ãé åãžã®ãã¹ ïŒ path ïŒãšãªã¯ãšã¹ãèªäœãæå®ããå¿ èŠããããŸãã 1ã€ã®ã€ã³ããã¯ã¹ãæ¢ããŠããå Žåãããã§ååã§ãã ãã ãããã®å Žåãæ€çŽ¢ã¯è€æ°ã®ã€ã³ããã¯ã¹ã§åæã«æ©èœãããã®ãããªãã¹ãã€ã³ããã¯ã¹ã®ãããã«ãååšããªãå ŽåãESã¯ãšã©ãŒãè¿ããŸãã ãããã£ãŠããã¹ããããã¯ãšãªã¯indecesãããã¯ã§å²ãŸããæ€çŽ¢ããã€ã³ããã¯ã¹ã瀺ãå¿ èŠããããŸãã æ®ãã®ã€ã³ããã¯ã¹ãæ€çŽ¢ãå¿ èŠãšããªãããšãæ確ã«ç€ºãããã«ãã no_match_query ãïŒã none ããšèšè¿°ããŸãã
{ "indices": { "index": "indexName", "query": { "nested": { "path": "PathToArray", "query": { "multi_match": { "query": " ", "fields": [ "PathToArray.FieldName" ] } } } }, "no_match_query": "none" } }
ãã€ã©ã€ã
Elasticsearchã¯ããªã¯ãšã¹ãã«å¿ããŠããã¥ã¡ã³ãå ã§èŠã€ãã£ãããã¹ãã匷調衚瀺ããåªããæ¹æ³ãæäŸããŸãã
ãããè¡ãã«ã¯ãæ°ããèŠæ±ãããã¯ãè¿œå ããŸãïŒ highlight ãã£ãŒã«ãã«ããã€ã©ã€ããè¿ããã£ãŒã«ãããªã¹ãããŸãã pre_tagsããã³post_tagsãã©ã¡ãŒã¿ãŒã§ã¯ãåèªã匷調衚瀺ããæåãæå®ããŸãã æ€çŽ¢ã倧ããªããã¹ããã£ãŒã«ãïŒãã¡ã€ã«ãªã©ïŒã§æ©èœããå ŽåãElasticsearchã¯ããã£ãŒã«ãå šäœã§ã¯ãªããå°ããªéè·¯å ã§ãã€ã©ã€ããè¿ããŸãã ãã®çµæããšã³ãžã³ã¯ãã¹ãŠã®äœæ¥ãè¡ã£ãŠãããŸããããããã倪åã§åŒ·èª¿è¡šç€ºããã³ã³ããã¹ãã匷調衚瀺ããŸããã
{ "highlight": { "pre_tags": [ "<b>" ], "post_tags": [ "</b>" ], "fields": { "FieldName": {} } } }
ãã®æ©èœã®äŸ¿å©ãã«ããããããããã®æ©èœã䜿çšããéã«æ·±å»ãªåé¡ãçºçããŸããã ãã€ã©ã€ããé åå ã®ãªããžã§ã¯ãã«äœçšããå Žåããªã¯ãšã¹ããžã®å¿çã«ãã£ãŠãã©ã®ãªããžã§ã¯ãã«å±ããŠããããå€æããããšã¯ã§ããŸããã ãã¡ã€ã«ã¯é åã«æ ŒçŽããããã¡ã€ã«ã«ãã匷調衚瀺ã¯é¡§å®¢ã®éèŠãªèŠä»¶ã®1ã€ã§ãã ãã®åé¡ã®è«ççãªè§£æ±ºçã¯ã File_iãšãã圢åŒã®ãã£ãŒã«ãã®ã»ãããäœæããããšã§ããããã§ã iã¯åŠ¥åœãªéã®æ·»ä»ãã¡ã€ã«ãã«ããŒããŸãã 次ã«ã匷調衚瀺ããããšã«ããããã¡ã€ã«ãã©ã®ã€ã³ããã¯ã¹ãæã£ãŠããããæããã«ãªããæ€çŽ¢çµæãããã®ã€ã³ããã¯ã¹ã«ãã£ãŠãã¡ã€ã«åãååŸã§ããŸãã
ãããããã¹ãŠãããã»ã©åçŽã§ã¯ãªãããšãå€æããŸããã Base64æååããããã¹ããžã®å€æããã»ããµã¯ãåãååã®ãã£ãŒã«ããæã€é åã§ã®ã¿æ©èœããŸãã Discussion.elastic.coãã©ãŒã©ã ã®ãã«ãã®ãããã§ã 解決çãèŠã€ãããŸãããããã¹ãã«å€æããåŸããã£ãŒã«ãã®ååãç®çã®ãã©ãŒã ã«å€æŽããå¥ã®ããã»ããµãè¿œå ããŸãã CPUã³ãŒãïŒ
"script": { "lang": "painless", "inline": """for (def i = 0; i < ctx.Files.length; i++) { def f = 'File' + (i+1); ctx.Files[i][f] = ctx.Files[i].attachment; ctx.Files[i][f].Name = ctx.Files[i].Name; for (def rf : ['attachment', 'Name']) { ctx.Files[i].remove(rf); } }""" }
ãµãžã§ã¹ã
ãªã¯ãšã¹ããå ¥åãããšãã«ããŠãŒã¶ãŒãã¿ã€ããã¹ãããããšããããŸãã ãã®å Žåãæ€çŽ¢çµæã¯ç©ºã«ãªããŸãã ãã ããElasticsearchã¯ãšã©ãŒã®å¯èœæ§ã瀺åããå ŽåããããŸãã
ãã®æ©èœã¯ã ææ¡ãããã¯ãä»ããŠå®è£ ãããŸãã ãªãŒãã³ã³ããªãŒãã«ã€ããŠèª¬æãããšãã«ãã§ã«åœŒãšäŒã£ãŠããŸããããã¿ã€ããã¹ãåŠçããããã«å¥ã®ã¿ã€ãã®æ²ãã身æ¯ãæã䜿çšãããŠããŸãã ãã¬ãŒãºãšåŒã°ããåã ã®åèªã§ã¯ãªããã¬ãŒãºå šäœãæå®ããŠãšã©ãŒãæ€çŽ¢ããŸãã çµæã®ãã³ãã®æ°ïŒ size ïŒãæ€çŽ¢ã®ãã£ãŒã«ã ïŒ field ïŒãããã³ãã¬ãŒãºã®å¯èœãªã¿ã€ããã¹ã®æ°ïŒ max_errors ïŒã瀺ããŸãã äžèŠãªçµæãã«ããããã«ã¯ãçµæã®sjestãå°ãªããšã1ã€ã®ã€ã³ããã¯ã¹ãã£ãŒã«ãã«å«ãŸããŠããããšã確èªãããµãã¯ãšãª ïŒ collatââe ïŒãè¿œå ããŸãã ããŒã«ãããã¯ãã¹ãã«ãã¹ã®åèªã匷調衚瀺ããçµã¿èŸŒã¿ã®åŒ·èª¿è¡šç€ºããµããŒãããŠããŸãã
{ "suggest": { "my_suggest": { "text": " ", "phrase": { "size": 1, "field": "_all", "max_errors": 4, "collate": { "query": { "inline": { "match": { "{{field_name}}": { "query": "{{suggestion}}", "operator": "and" } } } }, "params": { "field_name": "_all" } }, "highlight": { "pre_tag": "<b>", "post_tag": "</b>" } } } } }
ãã¡ã»ãã
Elasticsearchã䜿çšããŠå®è£ ã§ãããã1ã€ã®èå³æ·±ãæ©èœã ãã¡ã»ããã¯ããªã³ã©ã€ã³ã¹ãã¢ã§ããèŠãããéèšãããã¯ã§ãã aggsãããã¯ããªã¯ãšã¹ãã«è¿œå ããŸãã ãã®ãããã¯ã«è¿œå ã§ããæãäžè¬çãªã¿ã€ãã®éçŽã¯ã termsãšåŒã°ããŸã ã çµæã«ã¯ã察å¿ãããã£ãŒã«ãã®äžæã®å€ãšãããããèŠã€ãã£ãããã¥ã¡ã³ãã®æ°ããã¹ãŠå«ãŸããŸãã éèšãããã¥ã¡ã³ãã®ã»ããå šäœã«é©çšãããã®ã§ã¯ãªããæ€çŽ¢ã¯ãšãªãæºããããã¥ã¡ã³ãã®ã¿ã«é©çšãããããšãéèŠã§ãã ãããã£ãŠãããã¹ããå ¥åãããšããã¡ã»ããã®ã³ã³ãã³ããåçã«å€æŽãããŸãã
{ "aggs": { "types": { "terms": { "field": "TypeField" } }, "min_date": { "min": { "field": "DateField" } } } }
ãã¡ã»ãããå®å šã«å®è£ ããã«ã¯ãæ€çŽ¢ã¯ãšãªã«ãã£ã«ã¿ãªã³ã°ãããã¯ãè¿œå ããå¿ èŠããããŸãã ããã«ããããã¡ã»ããã§éžæãããããã¥ã¡ã³ããã©ã¡ãŒã¿ã®ã¿ã«æ€çŽ¢ãå¶éãããŸãã ãã£ã«ã¿ãŒãããã¯ã§ã ãã£ã«ã¿ãŒãããã¯ããšã«ãµãã¯ãšãªãè¿œå ããŸãã
{ "terms" : { "TypeField" : [ /* */] } }, { "range" : { "DateField" : { "gte" : /* */ } } }
ãŸãšã
ãããå šäœåã®èŠãæ¹ã§ãã
ã€ã³ããã¯ã¹çµ±èš
çŸåšããã€ãããããŒãžã§ã³ã§ã¯æ§æã䜿çšãããŸãã
2å°ã®ãµãŒããŒïŒ
CPU ïŒIntel Xeon Platinum 8160ïŒ10ã³ã¢ïŒ
RAM ïŒ40 GB
ã€ã³ããã¯ã¹ããªã¥ãŒã ïŒ260 GB
ã€ã³ããã¯ã¹å
ã®ããã¥ã¡ã³ãã®æ° ïŒ60äž
ã€ã³ããã¯ã¹äœæé床 ïŒ5000 doc / h