How PostgreSQL UDF Can Accelerate the Performance of Compound Queries of Single- and Multi-value Fields

Image for post
Image for post

Background

create table tbl(gid int, c1 int[]);  


insert into tbl select random()*99, gen_randarr(999, 10) from generate_series(1,10000000);

Demo

CREATE OR REPLACE FUNCTION public.gen_randarr(integer, integer)  
RETURNS integer[]
LANGUAGE sql
STRICT
AS $function$
select array(select (random()*$1)::int from generate_series(1,$2));
$function$;
create table tbl(gid int, c1 int[]);
insert into tbl select random()*99, gen_randarr(999, 10) from generate_series(1,10000000);

Traditional Acceleration Method 1

create extension btree_gin;  
set maintenance_work_mem ='8GB';
create index idx_tbl_1 on tbl using gin (gid, c1);
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where gid=1 and c1 @> array[1,2,3];  
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=96.00..97.02 rows=1 width=65) (actual time=12.810..12.810 rows=0 loops=1)
Output: gid, c1
Recheck Cond: ((tbl.gid = 1) AND (tbl.c1 @> '{1,2,3}'::integer[]))
Buffers: shared hit=184
-> Bitmap Index Scan on idx_tbl_1 (cost=0.00..96.00 rows=1 width=0) (actual time=12.807..12.807 rows=0 loops=1)
Index Cond: ((tbl.gid = 1) AND (tbl.c1 @> '{1,2,3}'::integer[]))
Buffers: shared hit=184
Planning time: 0.154 ms
Execution time: 12.838 ms
(9 rows)
postgres=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where gid=1 and c1 && array[1,2,3];
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=129.80..3433.25 rows=3297 width=65) (actual time=17.453..22.486 rows=2932 loops=1)
Output: gid, c1
Recheck Cond: ((tbl.gid = 1) AND (tbl.c1 && '{1,2,3}'::integer[]))
Heap Blocks: exact=2906
Buffers: shared hit=3089
-> Bitmap Index Scan on idx_tbl_1 (cost=0.00..128.97 rows=3297 width=0) (actual time=17.121..17.121 rows=2932 loops=1)
Index Cond: ((tbl.gid = 1) AND (tbl.c1 && '{1,2,3}'::integer[]))
Buffers: shared hit=183
Planning time: 0.223 ms
Execution time: 22.761 ms
(10 rows)

Traditional Acceleration Method 2

postgres=# drop index idx_tbl_1;  
DROP INDEX

postgres=# create index idx_tbl_1 on tbl (gid);
CREATE INDEX

postgres=# create index idx_tbl_2 on tbl using gin (c1);
explain (analyze,verbose,timing,costs,buffers) select * from tbl where gid=1 and c1 @> array[1,2,3];  

explain (analyze,verbose,timing,costs,buffers) select * from tbl where gid=1 and c1 && array[1,2,3];

postgres=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where gid=1 and c1 @> array[1,2,3];
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=72.09..83.25 rows=1 width=65) (actual time=12.848..12.848 rows=0 loops=1)
Output: gid, c1
Recheck Cond: (tbl.c1 @> '{1,2,3}'::integer[])
Filter: (tbl.gid = 1)
Rows Removed by Filter: 13
Heap Blocks: exact=13
Buffers: shared hit=131
-> Bitmap Index Scan on idx_tbl_2 (cost=0.00..72.09 rows=11 width=0) (actual time=12.810..12.810 rows=13 loops=1)
Index Cond: (tbl.c1 @> '{1,2,3}'::integer[])
Buffers: shared hit=118
Planning time: 0.254 ms
Execution time: 12.874 ms
(12 rows)

postgres=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where gid=1 and c1 && array[1,2,3];
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=3534.41..6837.86 rows=3297 width=65) (actual time=69.636..74.613 rows=2932 loops=1)
Output: gid, c1
Recheck Cond: ((tbl.gid = 1) AND (tbl.c1 && '{1,2,3}'::integer[]))
Heap Blocks: exact=2906
Buffers: shared hit=2982 read=279
-> BitmapAnd (cost=3534.41..3534.41 rows=3297 width=0) (actual time=69.002..69.002 rows=0 loops=1)
Buffers: shared hit=76 read=279
-> Bitmap Index Scan on idx_tbl_1 (cost=0.00..1089.93 rows=106333 width=0) (actual time=13.538..13.538 rows=100704 loops=1)
Index Cond: (tbl.gid = 1)
Buffers: shared read=279
-> Bitmap Index Scan on idx_tbl_2 (cost=0.00..2442.58 rows=310077 width=0) (actual time=50.878..50.878 rows=296887 loops=1)
Index Cond: (tbl.c1 && '{1,2,3}'::integer[])
Buffers: shared hit=76
Planning time: 0.147 ms
Execution time: 74.886 ms
(15 rows)

Acceleration Method that Improves the Performance by Over 100 Times

create or replace function gen_newarr(int, anyarray) returns text[] as $$  
declare
res text[] := '{}';
x int;
begin
foreach x in array $2 loop
res := array_append(res, $1||'_'||x);
end loop;
return res;
end;
$$ language plpgsql strict immutable;




postgres=# select gen_newarr(123,array[1,2,3,4]);
-[ RECORD 1 ]-------------------------
gen_newarr | {123_1,123_2,123_3,123_4}
set maintenance_work_mem ='8GB';  
create index idx_tbl_2 on tbl using gin (gen_newarr(gid, c1));
postgres=# explain (analyze,verbose,timing,costs,buffers)  select * from tbl where gen_newarr(gid, c1) @> array['1_1','1_2','1_3'];  
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=12.01..13.27 rows=1 width=65) (actual time=0.146..0.146 rows=0 loops=1)
Output: gid, c1
Recheck Cond: (gen_newarr(tbl.gid, tbl.c1) @> '{1_1,1_2,1_3}'::text[])
Buffers: shared hit=14
-> Bitmap Index Scan on idx_tbl_2 (cost=0.00..12.01 rows=1 width=0) (actual time=0.144..0.144 rows=0 loops=1)
Index Cond: (gen_newarr(tbl.gid, tbl.c1) @> '{1_1,1_2,1_3}'::text[])
Buffers: shared hit=14
Planning time: 0.092 ms
Execution time: 0.174 ms
(9 rows)

postgres=# explain (analyze,verbose,timing,costs,buffers) select * from tbl where gen_newarr(gid, c1) && array['1_1','1_2','1_3'];
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl (cost=1220.70..133422.08 rows=149251 width=65) (actual time=1.020..6.034 rows=2932 loops=1)
Output: gid, c1
Recheck Cond: (gen_newarr(tbl.gid, tbl.c1) && '{1_1,1_2,1_3}'::text[])
Heap Blocks: exact=2906
Buffers: shared hit=2919
-> Bitmap Index Scan on idx_tbl_2 (cost=0.00..1183.38 rows=149251 width=0) (actual time=0.640..0.640 rows=2932 loops=1)
Index Cond: (gen_newarr(tbl.gid, tbl.c1) && '{1_1,1_2,1_3}'::text[])
Buffers: shared hit=13
Planning time: 0.102 ms
Execution time: 6.348 ms
(10 rows)
select * from tbl where gen_newarr(gid, c1) && array['1_1','1_2','1_3'];  

gid | c1
-----+-----------------------------------------
1 | {62,904,204,618,917,227,388,352,167,1}
1 | {825,126,174,409,340,285,231,942,3,136}
1 | {222,418,799,881,728,582,558,2,368,196}
1 | {847,197,690,1,288,468,179,521,799,196}
1 | {867,316,447,747,953,998,370,360,558,3}
1 | {249,963,669,929,534,945,388,816,1,601}
1 | {925,609,108,981,712,681,906,832,3,275}
1 | {3,354,253,947,588,598,401,89,246,968}
1 | {323,121,22,3,7,714,80,619,178,439}
1 | {866,1,185,704,932,882,496,324,264,882}
......

Acceleration Method for a Compound Query of a Single-Value Column and Full-Text Search

create table tbl123(gid int, ts tsvector);  

insert into tbl123 select random()*99, array_to_tsvector(gen_randarr(999, 10)::text[]) from generate_series(1,10000000);
create index idx_tbl123_1 on tbl123 using gin ( array_to_tsvector(gen_newarr(gid, tsvector_to_array(ts))) );
explain (analyze,verbose,timing,costs,buffers) select * from tbl123 where array_to_tsvector(gen_newarr(gid, tsvector_to_array(ts))) @@ tsquery '1_1 & 1_2 & 1_3';


explain (analyze,verbose,timing,costs,buffers) select * from tbl123 where array_to_tsvector(gen_newarr(gid, tsvector_to_array(ts))) @@ tsquery '1_1 | 1_2 | 1_3';


postgres=# explain (analyze,verbose,timing,costs,buffers) select * from tbl123 where array_to_tsvector(gen_newarr(gid, tsvector_to_array(ts))) @@ tsquery '1_1 & 1_2 & 1_3';
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl123 (cost=12.01..13.28 rows=1 width=77) (actual time=0.207..0.207 rows=0 loops=1)
Output: gid, ts
Recheck Cond: (array_to_tsvector(gen_newarr(tbl123.gid, tsvector_to_array(tbl123.ts))) @@ '''1_1'' & ''1_2'' & ''1_3'''::tsquery)
Buffers: shared hit=14
-> Bitmap Index Scan on idx_tbl123_1 (cost=0.00..12.01 rows=1 width=0) (actual time=0.204..0.204 rows=0 loops=1)
Index Cond: (array_to_tsvector(gen_newarr(tbl123.gid, tsvector_to_array(tbl123.ts))) @@ '''1_1'' & ''1_2'' & ''1_3'''::tsquery)
Buffers: shared hit=14
Planning time: 0.080 ms
Execution time: 0.238 ms
(9 rows)

postgres=# explain (analyze,verbose,timing,costs,buffers) select * from tbl123 where array_to_tsvector(gen_newarr(gid, tsvector_to_array(ts))) @@ tsquery '1_1 | 1_2 | 1_3';
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------
Bitmap Heap Scan on public.tbl123 (cost=1220.70..136709.34 rows=149251 width=77) (actual time=0.971..5.988 rows=2970 loops=1)
Output: gid, ts
Recheck Cond: (array_to_tsvector(gen_newarr(tbl123.gid, tsvector_to_array(tbl123.ts))) @@ '''1_1'' | ''1_2'' | ''1_3'''::tsquery)
Heap Blocks: exact=2937
Buffers: shared hit=2950
-> Bitmap Index Scan on idx_tbl123_1 (cost=0.00..1183.38 rows=149251 width=0) (actual time=0.612..0.612 rows=2970 loops=1)
Index Cond: (array_to_tsvector(gen_newarr(tbl123.gid, tsvector_to_array(tbl123.ts))) @@ '''1_1'' | ''1_2'' | ''1_3'''::tsquery)
Buffers: shared hit=13
Planning time: 0.029 ms
Execution time: 6.284 ms
(10 rows)

postgres=# select * from tbl123 where array_to_tsvector(gen_newarr(gid, tsvector_to_array(ts))) @@ tsquery '1_1 | 1_2 | 1_3';
gid | ts
-----+-----------------------------------------------------------
1 | '180' '219' '253' '262' '282' '3' '633' '657' '807' '809'
1 | '1' '166' '261' '670' '807' '860' '897' '922' '93' '964'
1 | '1' '174' '211' '319' '322' '532' '84' '849' '869' '993'
......

Conclusion

Original Source:

Follow me to keep abreast with the latest technology news, industry insights, and developer trends.

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store