Optimizations with Full-Text Search in PostgreSQL

Example 1. Full-Text Search: “Does Not Contain” Query

postgres=# create table notcontain (id int, info tsvector);  
CREATE TABLE
CREATE OR REPLACE FUNCTION   
gen_rand_str(integer)
RETURNS text
LANGUAGE sql
STRICT
AS $function$
select string_agg(a[(random()*6)::int+1],'') from generate_series(1,$1), (select array['a','b','c','d','e','f',' ']) t(a);
$function$;
postgres=# insert into notcontain select generate_series(1,1000000), to_tsvector(gen_rand_str(256));
create index idx_notcontain_info on notcontain using gin (info);
postgres=# select * from notcontain limit 1;  
-[ RECORD 1 ]----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
id | 1
info | 'afbbeeccbf':3 'b':16 'bdcdfd':2 'bdcfbcecdeeaed':8 'bfedfecbfab':7 'cd':9 'cdcaefaccdccadeafadededddcbdecdaefbcfbdaefcec':14 'ceafecff':6 'd':17,18 'dbc':12 'dceabcdcbdca':10 'dddfdbffffeaca':13 'deafcccfbcdebdaecda':11 'dfbadcdebdedbfa':19 'eb':15 'ebe':1 'febdcbdaeaeabbdadacabdbbedfafcaeabbdcedaeca':5 'fedeecbcdfcdceabbabbfcdd':4
select * from notcontain t1 where info @@ to_tsquery ('!eb');  

postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where info @@ to_tsquery ('!eb');
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------
Seq Scan on postgres.notcontain t1 (cost=0.00..318054.51 rows=950820 width=412) (actual time=0.016..1087.463 rows=947911 loops=1)
Output: id, info
Filter: (t1.info @@ to_tsquery('!eb'::text))
Rows Removed by Filter: 52089
Buffers: shared hit=55549
Planning time: 0.131 ms
Execution time: 1134.571 ms
(7 rows)
postgres=# set enable_seqscan=off;  
SET

postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where info @@ to_tsquery ('!eb');
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on postgres.notcontain t1 (cost=82294981.00..82600120.25 rows=950820 width=412) (actual time=1325.587..1540.145 rows=947911 loops=1)
Output: id, info
Recheck Cond: (t1.info @@ to_tsquery('!eb'::text))
Heap Blocks: exact=55549
Buffers: shared hit=171948
-> Bitmap Index Scan on idx_notcontain_info (cost=0.00..82294743.30 rows=950820 width=0) (actual time=1315.663..1315.663 rows=947911 loops=1)
Index Cond: (t1.info @@ to_tsquery('!eb'::text))
Buffers: shared hit=116399
Planning time: 0.135 ms
Execution time: 1584.670 ms
(10 rows)

Example 2. Full-Text Search: “Does Not Contain” Query

postgres=# truncate notcontain ;  
TRUNCATE TABLE
postgres=# insert into notcontain select generate_series(1,1000000), to_tsvector('abc');
INSERT 0 1000000
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where info @@ to_tsquery ('!abc');  
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on postgres.notcontain t1 (cost=220432.15..220433.71 rows=1 width=21) (actual time=107.936..107.936 rows=0 loops=1)
Output: id, info
Recheck Cond: (t1.info @@ to_tsquery('!abc'::text))
Buffers: shared hit=268
-> Bitmap Index Scan on idx_notcontain_info (cost=0.00..220432.15 rows=1 width=0) (actual time=107.933..107.933 rows=0 loops=1)
Index Cond: (t1.info @@ to_tsquery('!abc'::text))
Buffers: shared hit=268
Planning time: 0.183 ms
Execution time: 107.962 ms
(9 rows)
postgres=# set enable_bitmapscan =off;  
SET
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where info @@ to_tsquery ('!abc');
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Seq Scan on postgres.notcontain t1 (cost=0.00..268870.00 rows=1 width=21) (actual time=1065.436..1065.436 rows=0 loops=1)
Output: id, info
Filter: (t1.info @@ to_tsquery('!abc'::text))
Rows Removed by Filter: 1000000
Buffers: shared hit=6370
Planning time: 0.059 ms
Execution time: 1065.449 ms
(7 rows)

Example 3. Ordinary B-Tree Indexes: “Not Equal To” Searches

postgres=# truncate notcontain ;  
TRUNCATE TABLE
postgres=# insert into notcontain select generate_series(1,1000000);
INSERT 0 1000000
postgres=# create index idx1 on notcontain (id);
CREATE INDEX
postgres=# set enable_bitmapscan =on;
SET
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where id<>1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------
Seq Scan on postgres.notcontain t1 (cost=0.00..16925.00 rows=999999 width=36) (actual time=0.011..110.592 rows=999999 loops=1)
Output: id, info
Filter: (t1.id <> 1)
Rows Removed by Filter: 1
Buffers: shared hit=4425
Planning time: 0.195 ms
Execution time: 156.013 ms
(7 rows)


postgres=# set enable_seqscan=off;
SET
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where id<>1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Seq Scan on postgres.notcontain t1 (cost=10000000000.00..10000016925.00 rows=999999 width=36) (actual time=0.011..110.964 rows=999999 loops=1)
Output: id, info
Filter: (t1.id <> 1)
Rows Removed by Filter: 1
Buffers: shared hit=4425
Planning time: 0.062 ms
Execution time: 156.461 ms
(7 rows)
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where not exists (select 1 from notcontain t2 where t1.id=t2.id and t2.id=1);  
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------
Merge Anti Join (cost=0.85..25497.28 rows=999999 width=36) (actual time=0.023..277.639 rows=999999 loops=1)
Output: t1.id, t1.info
Merge Cond: (t1.id = t2.id)
Buffers: shared hit=7164
-> Index Scan using idx1 on postgres.notcontain t1 (cost=0.42..22994.22 rows=1000000 width=36) (actual time=0.009..148.520 rows=1000000 loops=1)
Output: t1.id, t1.info
Buffers: shared hit=7160
-> Index Only Scan using idx1 on postgres.notcontain t2 (cost=0.42..3.04 rows=1 width=4) (actual time=0.007..0.008 rows=1 loops=1)
Output: t2.id
Index Cond: (t2.id = 1)
Heap Fetches: 1
Buffers: shared hit=4
Planning time: 0.223 ms
Execution time: 322.798 ms
(14 rows)
postgres=# set enable_mergejoin=off;
SET
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where not exists (select 1 from notcontain t2 where t1.id=t2.id and t2.id=1);
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------
Hash Anti Join (cost=3.05..27053.05 rows=999999 width=36) (actual time=0.060..251.232 rows=999999 loops=1)
Output: t1.id, t1.info
Hash Cond: (t1.id = t2.id)
Buffers: shared hit=4432
-> Seq Scan on postgres.notcontain t1 (cost=0.00..14425.00 rows=1000000 width=36) (actual time=0.011..84.659 rows=1000000 loops=1)
Output: t1.id, t1.info
Buffers: shared hit=4425
-> Hash (cost=3.04..3.04 rows=1 width=4) (actual time=0.014..0.014 rows=1 loops=1)
Output: t2.id
Buckets: 1024 Batches: 1 Memory Usage: 9kB
Buffers: shared hit=4
-> Index Only Scan using idx1 on postgres.notcontain t2 (cost=0.42..3.04 rows=1 width=4) (actual time=0.010..0.011 rows=1 loops=1)
Output: t2.id
Index Cond: (t2.id = 1)
Heap Fetches: 1
Buffers: shared hit=4
Planning time: 0.147 ms
Execution time: 297.127 ms
(18 rows)

postgres=# set enable_seqscan=off;
SET
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from notcontain t1 where not exists (select 1 from notcontain t2 where t1.id=t2.id and t2.id=1);
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------
Hash Anti Join (cost=3.48..35622.27 rows=999999 width=36) (actual time=0.036..324.401 rows=999999 loops=1)
Output: t1.id, t1.info
Hash Cond: (t1.id = t2.id)
Buffers: shared hit=7164
-> Index Scan using idx1 on postgres.notcontain t1 (cost=0.42..22994.22 rows=1000000 width=36) (actual time=0.017..149.383 rows=1000000 loops=1)
Output: t1.id, t1.info
Buffers: shared hit=7160
-> Hash (cost=3.04..3.04 rows=1 width=4) (actual time=0.011..0.011 rows=1 loops=1)
Output: t2.id
Buckets: 1024 Batches: 1 Memory Usage: 9kB
Buffers: shared hit=4
-> Index Only Scan using idx1 on postgres.notcontain t2 (cost=0.42..3.04 rows=1 width=4) (actual time=0.008..0.009 rows=1 loops=1)
Output: t2.id
Index Cond: (t2.id = 1)
Heap Fetches: 1
Buffers: shared hit=4
Planning time: 0.141 ms
Execution time: 369.749 ms
(18 rows)
postgres=# create  unlogged table ptbl(id int);  
CREATE TABLE
postgres=# insert into ptbl select generate_series(1,100000000);

postgres=# alter table ptbl set (parallel_workers =32);

\timing

Non-concurrently query:
postgres=# set max_parallel_workers_per_gather =0;
postgres=# select count(*) from ptbl where id<>1;
count
----------
99999999
(1 row)

Time: 11863.151 ms (00:11.863)

Concurrently query:
postgres=# set max_parallel_workers_per_gather =32;
postgres=# select count(*) from ptbl where id<>1;
count
----------
99999999
(1 row)

Time: 610.017 ms

Example 4. Ordinary Partial B-Tree Indexes: “Not Equal To” Searches

create table tbl (id int, info text, crt_time timestamp, c1 int);select * from tbl where c1<>1;insert into tbl select generate_series(1,10000000), 'test', now(), 1;
insert into tbl values (1,'abc',now(),2);
create index idx_tbl_1 on tbl(id) where c1<>1;
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where c1<>1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Index Scan using idx_tbl_1 on postgres.tbl (cost=0.12..1.44 rows=1 width=21) (actual time=0.015..0.015 rows=1 loops=1)
Output: id, info, crt_time, c1
Buffers: shared hit=1 read=1
Planning time: 0.194 ms
Execution time: 0.030 ms
(5 rows)

Summary

Original Source

--

--

--

Follow me to keep abreast with the latest technology news, industry insights, and developer trends. Alibaba Cloud website:https://www.alibabacloud.com

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

How to get VS Code working with GCP

Why Markdown is over hyped?

How to stay productive as a developer

Python Basics for Data Journalists Event

Website Vs Web Application

How is using a number in Python?

Control your kubernetes deployment with argo rollouts

Improving Reliability with Alibaba Cloud Message Service

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Alibaba Cloud

Alibaba Cloud

Follow me to keep abreast with the latest technology news, industry insights, and developer trends. Alibaba Cloud website:https://www.alibabacloud.com

More from Medium

Hands-On with REDIS | Part 2

Architecture Cassandra,Best Practices, Installation Cluster Multi DC With Ansible

How to Connect Elastic Sink Connector with Kafka

MongoDB Cluster Replication