Optimizing Real-time Tagging on PostgreSQL

where col1=? and col2=? and col3<>? or col4=?;
where col3=1 or col100=?Creating indexes for col3, col100?

Method 1: GIN Composite Indexes

+---------------------------------------------+|100000000001000000010000000000000111100000000| bitmap 1|000001000001000100010000000001000010000000010| bitmap 2&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&|000000000001000000010000000000000010000000000| Combined bitmap+-----------+-------+--------------+----------+|  |  |v  v  vUsed to scan the heap only for matching pages:+---------------------------------------------+|___________X_______X______________X__________|+---------------------------------------------+

Optimization Tricks

PostgreSQL Parallel Computing

Method 2: Row-level Full-text Search

Method 3: Bloom Filter

Method 4: Arrays

Multi-Field, Random Criteria Combinations, Tagging People in Milliseconds

create table test(uid int8 primary key, tag1 int, tag2 text, tag3 int, tag4 text, tag5 timestamp, tag6 date, ...);is converted tocreate table test(uid int8 primary key, tag text[]);
1, 1, 'man', 1023, 'football', '2017-01-01 10:00:00', '1989-09-01'is converted to1, array['tag1:1', 'tag2:man', 'tag3:1023', 'tag4:football', 'tag5:...', tag6:...']
where tag1=? and tag2=? or tag3=?
where arraycol @> array[tag1:?, tag2:?] or arraycol && [tag3:?]

Method 5: Bitmap

Method 6: Independent Indexes

postgres=# create table test(id serial8 primary key,c1 int, c2 int, c3 int, c4 int, c5 int,c6 int, c7 int, c8 int, c9 int, c10 int,c11 int, c12 int, c13 int, c14 int, c15 int,c16 int, c17 int, c18 int, c19 int, c20 int,c21 int, c22 int, c23 int, c24 int, c25 int,c26 int, c27 int, c28 int, c29 int, c30 int);create index idx_test_1 on test(c1);create index idx_test_2 on test(c2);create index idx_test_3 on test(c3);create index idx_test_4 on test(c4);create index idx_test_5 on test(c5);create index idx_test_6 on test(c6);create index idx_test_7 on test(c7);create index idx_test_8 on test(c8);create index idx_test_9 on test(c9);create index idx_test_10 on test(c10);create index idx_test_11 on test(c11);create index idx_test_12 on test(c12);create index idx_test_13 on test(c13);create index idx_test_14 on test(c14);create index idx_test_15 on test(c15);create index idx_test_16 on test(c16);create index idx_test_17 on test(c17);create index idx_test_18 on test(c18);create index idx_test_19 on test(c19);create index idx_test_20 on test(c20);create index idx_test_21 on test(c21);create index idx_test_22 on test(c22);create index idx_test_23 on test(c23);create index idx_test_24 on test(c24);create index idx_test_25 on test(c25);create index idx_test_26 on test(c26);create index idx_test_27 on test(c27);create index idx_test_28 on test(c28);create index idx_test_29 on test(c29);create index idx_test_30 on test(c30);postgres=# alter sequence test_id_seq cache 10000;Write 100 million test data recordsvi ins.sqlinsert into test (c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30) select random()*1000,random()*2000,random()*3000,random()*4000,random()*5000,random()*6000,random()*7000,random()*8000,random()*9000,random()*10000,random()*10000,random()*20000,random()*30000,random()*40000,random()*50000,random()*60000,random()*70000,random()*80000,random()*90000,random()*100000,random()*100000,random()*200000,random()*300000,random()*400000,random()*500000,random()*600000,random()*700000,random()*800000,random()*900000,random()*1000000 from generate_series(1,1000);pgbench -M prepared -n -r -P 1 -f ./ins.sql -c 50 -j 50 -t 2000postgres=# select count(*) from test;count-----------100000000(1 row)While testing queries of random field combinations, the query conditions were merged internally using bitmapAnd/bitmapOr, delivering outstanding performance.postgres=# explain (analyze,verbose,timing,costs,buffers) select * from test where c1=1 and c2=1 and c3=1 or (c10=1 and c11=1 or c12=1) and c14 between 1 and 1000000;QUERY PLAN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------Bitmap Heap Scan on public.test  (cost=1238.80..8607.84 rows=4887 width=128) (actual time=21.869..30.420 rows=4906 loops=1)Output: id, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30Recheck Cond: (((test.c3 = 1) AND (test.c2 = 1)) OR (((test.c10 = 1) AND (test.c11 = 1)) OR (test.c12 = 1)))Filter: (((test.c1 = 1) AND (test.c2 = 1) AND (test.c3 = 1)) OR ((((test.c10 = 1) AND (test.c11 = 1)) OR (test.c12 = 1)) AND (test.c14 >= 1) AND (test.c14 <= 1000000)))Rows Removed by Filter: 16Heap Blocks: exact=4915Buffers: shared hit=5230->  BitmapOr  (cost=1238.80..1238.80 rows=4903 width=0) (actual time=20.931..20.931 rows=0 loops=1)Buffers: shared hit=315->  BitmapAnd  (cost=947.23..947.23 rows=16 width=0) (actual time=17.602..17.602 rows=0 loops=1)Buffers: shared hit=235->  Bitmap Index Scan on idx_test_3  (cost=0.00..379.09 rows=32470 width=0) (actual time=7.965..7.965 rows=33036 loops=1)Index Cond: (test.c3 = 1)Buffers: shared hit=94->  Bitmap Index Scan on idx_test_2  (cost=0.00..565.45 rows=48517 width=0) (actual time=7.826..7.826 rows=50054 loops=1)Index Cond: (test.c2 = 1)Buffers: shared hit=141->  BitmapOr  (cost=291.32..291.32 rows=4887 width=0) (actual time=3.076..3.076 rows=0 loops=1)Buffers: shared hit=80->  BitmapAnd  (cost=231.88..231.88 rows=1 width=0) (actual time=2.769..2.769 rows=0 loops=1)Buffers: shared hit=62->  Bitmap Index Scan on idx_test_10  (cost=0.00..114.46 rows=9786 width=0) (actual time=1.104..1.104 rows=10085 loops=1)Index Cond: (test.c10 = 1)Buffers: shared hit=31->  Bitmap Index Scan on idx_test_11  (cost=0.00..114.72 rows=9821 width=0) (actual time=1.178..1.178 rows=9883 loops=1)Index Cond: (test.c11 = 1)Buffers: shared hit=31->  Bitmap Index Scan on idx_test_12  (cost=0.00..58.22 rows=4887 width=0) (actual time=0.307..0.307 rows=4904 loops=1)Index Cond: (test.c12 = 1)Buffers: shared hit=18Planning time: 0.460 msExecution time: 31.546 ms(32 rows)
create table tbl (  -- Master table...);create table tbl_0 (  -- Partition tablecrop_id int,  -- Partition mod(corp_id, 128)=0....);....alter table tbl_0 inherit tbl;  -- Sets the table inheritance relationship

Method 7: Array Deletion Method — JOIN Optimization

create table a(corp_id int, uid int, ....);create table b(uid int, ....);select b.* from a join b on (a.uid=b.uid and a.corp_id=? and b.xxx=xxx);
-- Step 1select * from a where corp_id=?-- Step 2select * from b where b.xxx=xxx and b.uid in (.......)
create table a(corp_id int, uid int, ....);create table b(uid int, corp_id int[], ....);  -- Adds the crop_id int[] array field, and maintains relationship between users and corporations-- Create GIN indexes on arrayscreate idx on b using gin (crop_id);-- Use the array intersection method to search for users that meet certain conditions within a corporationselect * from b where corp_id && array[?] and xxx=xxx;

Summary

Recommendations

--

--

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store