éåžžããã®åé¡ã¯ããã¡ãžãŒæ€çŽ¢ãŸãã¯é³å£°ã¢ã«ãŽãªãºã ã®äœ¿çšãšãã2ã€ã®æ¹æ³ã§è§£æ±ºã§ããŸãã æ ãè ã§ããããã¹ãŠã®ãã®ãç§ãã¡ã®åã«é·ãé
- pg_trgmã¯ãã©ã€ã°ã©ã ã§åäœããéšåæååããã³ãã¡ãžãŒæ€çŽ¢ã§æ€çŽ¢ã§ããŸãã ã€ã³ããã¯ã¹ãšããŠgistãšginã§åäœããŸã ã
- fuzzystrmatchã¯ãåèªãš3ã€ã®é³å£°ã¢ã«ãŽãªãºã ïŒ Soundex ã Metaphone ãããã³Double MetaphoneïŒéã®ã¬ãŒãã³ã·ã¥ã¿ã€ã³è·é¢ãã«ãŠã³ãã§ããŸãã èœãšãç©Žã¯ããŸãããã®ã¢ãžã¥ãŒã«ã®ã¬ãŒãã³ã·ã¥ã¿ã€ã³é¢æ°ã§ã¯ãä»»æã®æ€çŽ¢ã¯ãšãªã®ã€ã³ããã¯ã¹ãäœæã§ããªãããšã§ãã 第äºã«ããã¹ãŠã®é³å£°ã¢ã«ãŽãªãºã ã¯ã©ãã³ã¢ã«ãã¡ãããçšã«å®è£
ãããŠããŸãã
ãã®ç¹ã§ãç§ã¯ããããã軜ãåé¡ãã€ãŸãpg_trgmã¢ãžã¥ãŒã«ããã®è§£æ±ºçãæ¢ãå§ããŸããã
ãã©ã€ã°ã©ã
ã¢ãã«ãåçŽåããããã«ãæ£è IDã圌ã®å§ãåãããã³åŸæŽè ãå«ãæ å ±ããŒãã«ãæ€èšããŸãã gist / ginã€ã³ããã¯ã¹ãå¿ èŠãªã®ã§ãæåã«éèŠã§ãããäžå¿«ãªç¬éãç¥ãå¿ èŠããããŸãã1ã€ã®gist / ginã€ã³ããã¯ã¹-ããŒãã«ã®1åã§ãã ããšãã°ãè€æ°ã®åãé£çµããŠäœæããããšã¯ã§ããŸããã ãããã£ãŠãç«ã®äžã«äœæãããŸãïŒ
- pg_trgmæ¡åŒµ
- jsonbã®åœ¢åŒã§ååãæ ŒçŽããæ£è
ããŒãã«ïŒããŒã®ååšãšå
¥åã®ãã§ãã¯ä»ãïŒ
- ååãjsonbããããã¹ãã«å€æãããããªã°ã©ã ã€ã³ããã¯ã¹ãæ§ç¯ããããã®äžå€ã®é¢æ°
éå±ãªSQLã³ãŒã
create extension pg_trgm; create table info ( patid integer, fullname jsonb, constraint info_pk primary key (patid), constraint fullname_exists check ( fullname ? 'lname'::text and fullname ? 'fname'::text and fullname ? 'sname'::text ), constraint fullname_notnull check ( (fullname ->> 'lname'::text) is not null and (fullname ->> 'fname'::text) is not null ) ); create function fullname(in_fullname jsonb) returns text language plpgsql immutable as $$ begin return regexp_replace( lower( trim( coalesce(in_fullname->>'lname', '') || ' ' || coalesce(in_fullname->>'fname', '') || ' ' || coalesce(in_fullname->>'sname', '') ) ), '', '', 'g' ); exception when others then raise exception '%', sqlerrm; end; $$;
ãã«ããŒã ã®çŽ30äžä»¶ã®ã¬ã³ãŒããããŒãã«ã«æ¿å ¥ããŠç¶è¡ããŸãã
ãã©ã€ã°ã©ã ãšGIST
ãããã£ãŠããŸãããã©ã€ã°ã©ã ã«ãããã¡ãžãŒæ€çŽ¢ã®èŠæšã€ã³ããã¯ã¹ããã§ãã¯ããŸãã
ããã«éå±ãªSQLã³ãŒã
create index info_gist_idx on info using gist (fullname(fullname) gist_trgm_ops); CREATE INDEX Time: 15054,102 ms explain (analyze, buffers) select patid, fullname(fullname) <-> ' ' as dist from info order by dist limit 10; Limit (cost=0.28..4.35 rows=10 width=8) (actual time=157.378..157.688 rows=10 loops=1) Buffers: shared hit=5743 -> Index Scan using info_gist_idx on info (cost=0.28..126822.96 rows=312084 width=8) (actual time=157.371..157.655 rows=10 loops=1) Order By: (fullname(fullname) <-> ' '::text) Buffers: shared hit=5743 Planning time: 0.225 ms Execution time: 158.223 ms (7 rows)
ã€ã³ããã¯ã¹ã¯15ç§ã§äœæããããµã€ãºã¯45 MBãå ¥åãã¹ã®ããäžå®å šãªååã«ããæ€çŽ¢-158ããªç§ã§ãã
ãã©ã€ã°ã©ã ãšGIN
次ã«ããã¡ãžãŒããªã°ã©ã æ€çŽ¢ã®ãžã³ã€ã³ããã¯ã¹ãæ€èšããŸãã
以åã®SQLã¹ãã€ã©ãŒã¯éå±ã ãšæããŸããïŒ
create index info_trgm_idx on info using gin(fullname(fullname) gin_trgm_ops); CREATE INDEX Time: 10163,401 ms explain (analyze, buffers) select patid, similarity(fullname(fullname), ' ' ) as sml from info where true and fullname(fullname) % ' ' order by sml desc limit 10; Limit (cost=1180.22..1180.25 rows=10 width=8) (actual time=133.086..133.117 rows=8 loops=1) Buffers: shared hit=5741 -> Sort (cost=1180.22..1181.00 rows=312 width=8) (actual time=133.080..133.090 rows=8 loops=1) Sort Key: (similarity(fullname(fullname), ' '::text)) DESC Sort Method: quicksort Memory: 25kB Buffers: shared hit=5741 -> Bitmap Heap Scan on info (cost=26.70..1173.48 rows=312 width=8) (actual time=132.828..133.048 rows=8 loops=1) Recheck Cond: (fullname(fullname) % ' '::text) Heap Blocks: exact=7 Buffers: shared hit=5741 -> Bitmap Index Scan on info_gist_idx (cost=0.00..26.62 rows=312 width=0) (actual time=132.699..132.699 rows=8 loops=1) Index Cond: (fullname(fullname) % ' '::text) Buffers: shared hit=5734 Planning time: 0.573 ms Execution time: 133.225 ms (15 rows)
ã€ã³ããã¯ã¹äœæ10ç§ããµã€ãº18 MBãã¿ã€ããã¹ã§äžå®å šãªååãæ€çŽ¢-133 msã
æ£çŽã«èšããšãçµæã¯ãŸããŸãã§ã-ç§ã®ã©ãããããã«ã¯ãæ å ã®éœåžæ·±Shenzhenã®äžåœè£œã®ssdãã£ã¹ã¯ããããŸãã ãã®ããããã¡ãžãŒæ€çŽ¢ãšå šææ€çŽ¢ãçµã¿åãããŠããã»ã¹ã®é«éåãè©Šã¿ãŸãã
ãã©ã€ã°ã©ã ãšå šææ€çŽ¢
ã¢ã€ãã¢ã¯éåžžã«åçŽã§ã-å§ãååãæ称ã®ãã¹ãŠã®ã¹ãã«ãããå¥åã®ããŒãã«èŸæžãåéããŸãã æåã«ãå ¥åæ€çŽ¢æååãããŒã¯ã³ã«åãåãããã¡ãžãŒæ€çŽ¢ã§ãã£ã¯ã·ã§ããªããŒãã«å ã®åããŒã¯ã³ãæ¢ããããããåããŒã¯ã³ã®ãã¹ãŠã®å¯èœãªã¹ãã«ããªãšãŒã·ã§ã³ãéžæããtsqueryã«å ¥ããŠãinfoããŒãã«ã®tsvectorã€ã³ããã¯ã¹ã§å šææ€çŽ¢ãå®è¡ããŸãã ãã®ãã©ã³ã®å©ç¹ã¯ããã©ã€ã°ã©ã ã«ãããã¡ãžãŒæ€çŽ¢ã®é床ãè¡ã®å¹ ãšããã¹ãã®ããåã®çªå·ã«äŸåããããšã§ãã æããã«ããã«ããŒã èŸæžã¯infoããŒãã«ã®å ã®åãããã³ã³ãã¯ãã«ãªããããæ€çŽ¢ãé«éã«ãªããŸãã ãã®æ¹æ³ã«ã¯1ã€ã ãæ¬ ç¹ããããŸããæ°ããæ£è ãè¿œå ãããã³ã«ãååã®ããŒã¯ã³ãèŠã€ãããªãå Žåã¯èŸæžãæŽæ°ããå¿ èŠããããŸãã æ€èšŒã®ããã«ããœãŒã¹rumããã€ã³ããã¯ã¹ãåéããŠãinfoããŒãã«ã®ååã§tsvectorã€ã³ããã¯ã¹ãäœæããå¿ èŠããããŸãã ã©ã ã¯ãžã³ã€ã³ããã¯ã¹ã®ä¿®æ£ããŒãžã§ã³ã§ããããªãŒãã«è¿œå æ å ±ãä¿åããŸãã ãã®å Žåãrum_tsvector_opsæŒç®åã¯ã©ã¹ã䜿çšããŸããããã¯ãã€ã³ããã¯ã¹å ã®ããŒã¯ã³ã«é¢ããäœçœ®æ å ±ãæ ŒçŽããŸãã ãããã£ãŠãginãšã¯ç°ãªãã次ã®åœ¢åŒã®ã€ã³ããã¯ã¹ã®ã¿ã®tsqueryã¯ãšãªã䜿çšã§ããŸãã
ã¿ãã«å ã®ããŒã¯ã³ã®é åºã«é¢ãã詳现ã«ã€ããŠã¯ãè¡šã«ç§»åããŸããã ããã«ãginã®æšå¥šäºé ã¯ãtsvectoråã®ç©ççãªååšã§ããããã¯ãã¿ãã«ãžã®ãã¹ãŠã®èŠã€ãã£ããã€ã³ã¿ãŒãããŒãã«ã§ããã«ãã§ãã¯ããå¿ èŠãããããã§ãã ãŸããtsvectoråãç©ççã«ãªãå ŽåïŒã€ã³ããã¯ã¹çšã®é¢æ°ã䜿çšããŠäœæããå ŽåïŒãåã¿ãã«ã«å¯ŸããŠè¿œå ã®tsvectorèšç®ãå®è¡ããå¿ èŠããããŸãã äžè¬ã«ããã®ç©èªã®ã©ã é ã¯ã¯ããã«çç£çã§ãã(||)<->()<->()
éå±ãªSQLã®äžçã§ãšãã¬ã¹ã
create extension rum; create index info_rum_idx on info using rum ( to_tsvector('simple'::regconfig, fullname(fullname)) rum_tsvector_ops ); CREATE INDEX Time: 7.545s (7 seconds) create table patname ( lex text, constraint patname_uniq_idx unique (lex) ); create index patname_fuzzy_idx on patname using gin (lex gin_trgm_ops); CREATE INDEX Time: 0.596s insert into patname (lex) select word from ts_stat($$ select to_tsvector('simple', fullname(fullname)) from info $$); explain (analyze, buffers) with fio as ( select lexeme as lex, positions[1] as pos from unnest(to_tsvector('simple',' ')) ), query as( select to_tsquery('simple', string_agg(q.tq,'&')) as q from ( select f.pos, '('||string_agg(p.lex,'|')||')' as tq from fio as f join patname as p on p.lex % f.lex group by f.pos ) as q ) select to_tsvector('simple'::regconfig, fullname(fullname)) <=> (select q from query) as rank, * from info where to_tsvector('simple'::regconfig, fullname(fullname)) @@ (select q from query) order by to_tsvector('simple'::regconfig, fullname(fullname)) <=> (select q from query); Sort (cost=6453.71..6457.61 rows=1560 width=100) (actual time=68.201..68.202 rows=1 loops=1) Sort Key: ((to_tsvector('simple'::regconfig, fullname(info.fullname)) <=> $3)) Sort Method: quicksort Memory: 25kB Buffers: shared hit=536 CTE fio -> Function Scan on unnest (cost=0.00..0.10 rows=10 width=34) (actual time=0.023..0.034 rows=3 loops=1) CTE query -> Aggregate (cost=1484.60..1484.86 rows=1 width=32) (actual time=11.829..11.830 rows=1 loops=1) Buffers: shared hit=325 -> HashAggregate (cost=1484.30..1484.48 rows=10 width=34) (actual time=11.640..11.644 rows=2 loops=1) Group Key: f.pos Buffers: shared hit=325 -> Nested Loop (cost=16.58..1480.53 rows=755 width=19) (actual time=2.940..11.442 rows=62 loops=1) Buffers: shared hit=325 -> CTE Scan on fio f (cost=0.00..0.20 rows=10 width=34) (actual time=0.028..0.053 rows=3 loops=1) -> Bitmap Heap Scan on patname p (cost=16.58..147.28 rows=75 width=17) (actual time=1.905..3.717 rows=21 loops=3) Recheck Cond: (lex % f.lex) Rows Removed by Index Recheck: 321 Heap Blocks: exact=275 Buffers: shared hit=325 -> Bitmap Index Scan on patname_fuzzy_idx (cost=0.00..16.57 rows=75 width=0) (actual time=1.277..1.277 rows=342 loops=3) Index Cond: (lex % f.lex) Buffers: shared hit=50 InitPlan 3 (returns $3) -> CTE Scan on query (cost=0.00..0.02 rows=1 width=32) (actual time=0.004..0.006 rows=1 loops=1) InitPlan 4 (returns $4) -> CTE Scan on query query_1 (cost=0.00..0.02 rows=1 width=32) (actual time=11.834..11.839 rows=1 loops=1) Buffers: shared hit=325 -> Bitmap Heap Scan on info (cost=31.99..4885.97 rows=1560 width=100) (actual time=68.184..68.187 rows=1 loops=1) Recheck Cond: (to_tsvector('simple'::regconfig, fullname(fullname)) @@ $4) Heap Blocks: exact=1 Buffers: shared hit=536 -> Bitmap Index Scan on info_rum_idx (cost=0.00..31.60 rows=1560 width=0) (actual time=67.847..67.847 rows=1 loops=1) Index Cond: (to_tsvector('simple'::regconfig, fullname(fullname)) @@ $4) Buffers: shared hit=517 Planning time: 5.012 ms Execution time: 68.583 ms (37 rows)
åèšã§ãå šææ€çŽ¢ã€ã³ããã¯ã¹ã7ç§éïŒãµã€ãº13 MBïŒäœæãããããŒã¯ã³èŸæžã€ã³ããã¯ã¹ã0.6ç§ïŒãµã€ãº5.8 MBïŒã§äœæãããæ€çŽ¢ã¯68ããªç§ã§ããã æ¬ ç¹ã®ãã¡ãéžææ§ã¯ä»¥åã®ãªãã·ã§ã³ãããå£ã£ãŠããŸãã
é³å£°ã¢ã«ãŽãªãºã
pg_trmgã¢ãžã¥ãŒã«ã®ãã¡ãžãŒæ€çŽ¢ãªãã·ã§ã³ãè©ŠããåŸãfuzzystrmatchãããäžåºŠèŠãããšã«ããŸããã ã¬ãŒãã³ã·ã¥ã¿ã€ã³é¢æ°ã«ã€ã³ããã¯ã¹ãä»ããæ¹æ³ã¯æãã€ããŸããã§ããããé³å£°ã¢ã«ãŽãªãºã ã¯éåžžã«èå³æ·±ããã®ã§ããã åè¿°ã®ããã«ãPostgreSQLã§ã¯ãè¡šé³é¢æ°ã¯ã©ãã³ã¢ã«ãã¡ãããã«å¯ŸããŠã®ã¿å®è£ ãããè±èªã®ååã«å¯ŸããŠæçãããŸãã ãã·ã¢ã®å®è£ ãã€ã³ã¿ãŒãããã§æ€çŽ¢ããçµæãHabrã®çŽ æŽãããèšäºã«ç§»åããŸããããã®èšäºã§ã¯ããã·ã¢èªã®ååïŒãã·ã¢èªã®æåã§æ§æãããïŒã®Metaphoneã¢ã«ãŽãªãºã ã«ã€ããŠèª¬æããŠããŸãã æ²ããã£ãã®ã¯1ã€ã ãã§ã-åçŽã§ãããplpgsqlã§ãã®ããžãã¯ãå®è£ ããããPythonã§äœããå®è£ ããã®ã¯ãªããšãªãæ²ããããšã§ãã...ãããŠãplpython3uã¯å®å šã§ã¯ãªãããšãæãåºããŸããpostgresããã»ã¹ã®æš©éãæã€ãã¡ã€ã«ã·ã¹ãã ïŒãPostgreSQLã§å®å šã«æ©èœããèšèªã ãããŠãããã䜿ããªãã®ã¯çœªã§ãã ãããã£ãŠã2ã€ã®äžå€ã®é¢æ°ãäœæããŸããã
- plpython3uã®é³çŽ ãHabréã®èšäºã®ã¢ã«ãŽãªãºã ã«åŸã£ãŠãããŒã¯ã³ãé³çŽ ã«å€æããŸãïŒãsmirnovãããsmirnafãã«å€æïŒã
- plpgsqlã®metaphoneã1ã€ã®ããŒã¯ã³ãé³çŽ ã«å€æããã ãã§ãªããããã¹ãå
šäœãé³çŽ ã»ããã«å€æããŸãã å®éãããã¯é³çŽ é¢æ°ã®åãªããã€ã³ãã£ã³ã°ã§ãã
Metaphoneãšbtree
次ã«ãmetaphoneé¢æ°ã§éåžžã®btreeã€ã³ããã¯ã¹ãäœæããé床ãè©äŸ¡ããŠã¿ãŠãã ããã
éãéãããé¢çœããã®ã¯äœããªã
create or replace function phoneme (in_lexeme text) returns text language plpython3u immutable as $$ import re class Lexeme: def __init__(self, body): """ :type body: str """ self.body = body.lower().strip() # self._vowels = {"(?:|||)": "", "[]": "", "[]": "", "[]": ""} # self._consonants = {"": "", "": "", "": "", "": ""} # , _deafening_chars self._deafening_chars = ["", "", "", "", "", "", ""] # self._removable_chars = {"[]": ""} def _remove_double_chars(self): return Lexeme("".join((char for num, char in enumerate(self.body) if char != self.body[num - 1]))) def _deafen_consonants(self): modified_body = "" for num, char in enumerate(self.body): if char in self._consonants and ( num < len(self.body) - 1 and self.body[num + 1] in self._deafening_chars or num == len(self.body) - 1 ): modified_body += self._consonants[char] else: modified_body += char return Lexeme(modified_body) @staticmethod def _regexp_replace(text, char_dict): modified_body = text for item in char_dict: modified_body = re.sub(item, char_dict[item], modified_body) return Lexeme(modified_body) def _replace_vowels(self): return self._regexp_replace(self.body, self._vowels) def _remove_chars(self): return self._regexp_replace(self.body, self._removable_chars) def metaphone(self): return self._remove_chars()._replace_vowels()._deafen_consonants()._remove_double_chars().body return Lexeme(in_lexeme).metaphone() $$; create or replace function metaphone (in_phonemes text) returns text language plpgsql immutable as $$ begin return ( select string_agg(q.lex,' ') from ( select phoneme(lexeme) as lex from unnest(to_tsvector('simple', in_phonemes)) order by positions ) as q ); exception when others then raise '%', SQLERRM using errcode = SQLSTATE; end; $$; create index info_metaphone_idx on info ( metaphone(fullname(fullname)) text_pattern_ops ); CREATE INDEX Time: 114.757s (a minute) explain (analyze, buffers) select patid, fullname from info where metaphone(fullname(fullname)) like regexp_replace(metaphone(' '),'\s','%','g')||'%' limit 10; Limit (cost=76.03..1388.96 rows=10 width=96) (actual time=22.452..129.944 rows=3 loops=1) Buffers: shared hit=239 -> Bitmap Heap Scan on info (cost=76.03..4146.10 rows=31 width=96) (actual time=22.447..129.927 rows=3 loops=1) Filter: (metaphone(fullname(fullname)) ~~ '%%%'::text) Rows Removed by Filter: 244 Heap Blocks: exact=234 Buffers: shared hit=239 -> Bitmap Index Scan on info_metaphone_idx (cost=0.00..76.02 rows=1560 width=0) (actual time=0.061..0.061 rows=247 loops=1) Index Cond: ((metaphone(fullname(fullname)) ~>=~ ''::text) AND (metaphone(fullname(fullname)) ~<~ ''::text)) Buffers: shared hit=5 Planning time: 1.012 ms Execution time: 129.977 ms (12 rows) Time: 131,802 ms
ã€ã³ããã¯ã¹ã¯114ç§éäœæããããµã€ãºã¯22 MBïŒããã©ãŒãã³ã¹ã®èŠ³ç¹ããPythonã§æãæé©ãªé¢æ°ãèšè¿°ããŠããªãããã§ãïŒããªã¯ãšã¹ãã¯131ããªç§ã§ãã ã€ã³ããã¯ã¹ã¯éšåæååã®ããäžéšã§ã®ã¿æ©èœãããã£ã«ã¿ãŒã¯ãïŒ ãã®ããã«æ©èœããŸãã æ®å¿µã ã
Metaphoneãšãã©ã€ã°ã©ã
plpython3uã§äœæãããmetaphoneé¢æ°ã«åºã¥ããŠããã©ã€ã°ã©ã ã€ã³ããã¯ã¹ãæ§ç¯ããŠã¿ãŸãããã ãã ãããã¡ãžãŒæ€çŽ¢ã§ã¯ãªããéšåæååã®æ€çŽ¢ã«äœ¿çšããŸãã
ã䜿çšããŠã¯ãšãªæéã®æ°éçæ³ãæžããæ¹æ³...
create index info_metaphone_trgm_idx on info using gin (metaphone(fullname(fullname)) gin_trgm_ops); CREATE INDEX Time: 124.713s (2 minutes) explain (analyze, buffers) select patid, fullname from info where metaphone(fullname(fullname)) like '%'||regexp_replace(metaphone(' '),'\s','%','g')||'%' limit 10; Limit (cost=92.24..134.98 rows=10 width=96) (actual time=9.562..10.638 rows=3 loops=1) Buffers: shared hit=103 -> Bitmap Heap Scan on info (cost=92.24..224.74 rows=31 width=96) (actual time=9.554..10.617 rows=3 loops=1) Recheck Cond: (metaphone(fullname(fullname)) ~~ '%%%%'::text) Heap Blocks: exact=2 Buffers: shared hit=103 -> Bitmap Index Scan on info_metaphone_trgm_idx (cost=0.00..92.23 rows=31 width=0) (actual time=8.354..8.354 rows=3 loops=1) Index Cond: (metaphone(fullname(fullname)) ~~ '%%%%'::text) Buffers: shared hit=101 Planning time: 2.029 ms Execution time: 10.726 ms (11 rows) Time: 14,480 ms
ã€ã³ããã¯ã¹äœææé-124ç§ããµã€ãº15 MbïŒããã«ã¡ã¯ãç§ã®æ²ãã£ãæãšplpython3uïŒãæ€çŽ¢-14 msã
ãŸãšã
ããŸããŸãªã¿ã€ããã¹æ€çŽ¢ãªãã·ã§ã³ããŸãšããŸããã
æŽæ°1ïŒ movEAXããMetaphone å®è£ ãè¿œå ããŸãã ã
æŽæ°2ïŒ Ivan Milovanovããplpgsqlã«Metaphoneå®è£ ãè¿œå ãããŸããïŒtelegram-milovanovïŒ
plpgsqlã®Metaphone
create or replace function phoneme (in_lexeme text) returns text language plpgsql immutable as $$ declare res varchar(100) DEFAULT ''; begin res := lower(in_lexeme); res := regexp_replace(res,'[]','','g'); res := regexp_replace(res,'(|||)','','g'); res := regexp_replace(res,'[]','','g'); res := regexp_replace(res,'[]','','g'); res := regexp_replace(res,'','','g'); res := regexp_replace(res,'([]|$)','\1','g'); res := regexp_replace(res,'([]|$)','\1','g'); res := regexp_replace(res,'([]|$)','\1','g'); res := regexp_replace(res,'([]|$)','\1','g'); res := regexp_replace(res,'([]|$)','\1','g'); res := regexp_replace(res,'','','g'); res := regexp_replace(res,'','','g'); res := regexp_replace(res,'(.)\1','\1','g'); return res; exception when others then raise exception '%', sqlerrm; end; $$;
æ€çŽ¢ã®çš®é¡
| ã€ã³ããã¯ã¹äœææé
| ã€ã³ããã¯ã¹ãµã€ãº
| ã¿ã€ããã¹æ€çŽ¢é床
| åè
|
ãã©ã€ã°ã©ã ã®èŠç¹
| 15ç§
| 45 Mb
| 158ããªç§
| |
ãžã³ãã©ã€ã°ã©ã
| 10ç§
| 18 Mb
| 133ããªç§
| |
ãã©ã€ã°ã©ã ãšå
šææ€çŽ¢
| 7.6ç§
| 18.8 Mb
| 68ããªç§
| ããã«æªãéžææ§ãããŒã¯ã³ã®èŸæžãç¶æããå¿
èŠããããŸã
|
Metaphone btree
| 114ç§
| 22 Mb
| 131ããªç§
| å®å
šã§ãªãèšèªplpython3u
|
Metaphone Trigram
| 124ç§
| 15 Mb
| 14ããªç§
| å®å
šã§ãªãèšèªplpython3u
|
movEAXããMetaphone Trigramã å®è£
ãã
| 77.8ç§
| 16 Mb
| 14ããªç§
| å®å
šã§ãªãèšèªplpython3u
|
plpgsqlã§ã®Ivan Milovanovã®å®è£
| 72.0ç§
| 16 Mb
| 14ããªç§
|
æŽæ°3ïŒã€ã³ããã¯ã¹ã«ãsmirnaf dinis anatalievichããå«ãŸããå Žåã ããã«ããŒã ã® ãinãã®æåã¯èãããŸããïŒãã®åŸã«æ¯é³ãããããïŒã éšåæååã¡ã¿ãã©ã³ïŒ 'anatolia'ïŒãèŠããšãæåãcãã¯æ¯é³ã®äžã«ãããŸããããæåŸã«ã¯atç¶ãšããŸãã ãã®åé¡ãåé¿ããããã«ã mqueryé¢æ°ã以äžã«èšè¿°ããæ€çŽ¢ã¯åŒã«ãã£ãŠå®è¡ãããŸã
select metaphone(' ') similar to mquery(' ');
Mqeryé¢æ°
create or replace function mquery(in_fullname text) returns text language plpgsql immutable as $$ declare res text; begin res := metaphone(in_fullname); res := regexp_replace(res, '(|)', '(|)', 'g'); res := regexp_replace(res, '(|)', '(|)', 'g'); res := regexp_replace(res, '(|)', '(|)', 'g'); res := regexp_replace(res, '(|)', '(|)', 'g'); res := regexp_replace(res, '(|)', '(|)', 'g'); res := regexp_replace(res, '\s', '%', 'g'); return '%'||res||'%'; exception when others then raise exception '%', sqlerrm; end; $$;
ç§ã®å Žåãã·ã¹ãã ã¯æžã蟌ã¿ã§ã¯ãªãèªã¿åãã«çŠç¹ãåœãŠãããïŒæ倧ã§1åéã«æ£è ã®ãã¢ãè¿œå ããïŒãç§ã®ãªãã·ã§ã³ã¯Trigramä»ãã®Metaphoneã§ãã Pythonã®é¢æ°ãé床ã®èŠ³ç¹ããæé©åããæ¹æ³ã«ã€ããŠèª°ããã¢ã€ãã¢ãæã£ãŠãããªããã³ã¡ã³ãã®è³Œèªãäžæ¢ãããã¹ãã«ããŒã¿ãè¿œå ããŸãã