# Technical Solution

## Solution 1

`KEY: 用户ID标签1: 标签2: ...标签N:`
`每个标签字段一个索引`
`and , or , not 组合where 标签a and 标签b and ...`

# Solution 2

`KEY：用户IDVALUES：标签数组`
`标签数组字段: GIN倒排索引`
`与、或、非where VALUES @> array[标签s] -- 与where VALUES && array[标签s] -- 或where not VALUES @> array[标签s] -- 非`

## Solution 3

`KEY：标签IDVALUES: 用户bitmap`
`标签ID字段: Btree索引`
`聚合bitmap: 与、或、非and_agg(bitmaps) where KEY in (标签s) -- 与or_agg(bitmaps) where KEY in (标签s) -- 或except(bitmap1,bitmap2) -- 非`

# Demo

## Solution 1 Demo

`create table t_tag_dict (tag int primary key,   -- 标签(人群)idinfo text,  -- 人群描述crt_time timestamp  -- 时间);`
`insert into t_tag_dict values (1, '男', now());insert into t_tag_dict values (2, '女', now());insert into t_tag_dict values (3, '大于24岁', now());-- ...insert into t_tag_dict select generate_series(4,100000), md5(random()::text), clock_timestamp();`
`create table t_user_tag (uid int8,   -- 用户idtag int,            -- 用户对应标签（人群）mod_time timestamp,     -- 时间primary key (tag,uid));`
`create or replace function gen_rand_tag(int,int) returns setof int as\$\$  select case when random() > 0.5 then 1::int else 2::int end as tag  union all  select ceil(random()*\$1)::int as tag from generate_series(1,\$2);\$\$ language sql strict volatile;insert into t_user_tagselect uid, gen_rand_tag(100000,63) as tag, clock_timestamp() from generate_series(1,20000000) as uid on conflict (uid,tag) do nothing;-- 或使用如下方法加速导入create sequence seq;vi test.sqlinsert into t_user_tagselect uid, gen_rand_tag(100000,63) as tag, clock_timestamp() from nextval('seq'::regclass) as uid on conflict(tag,uid) do nothing;pgbench -M prepared -n -r -P 1 -f ./test.sql -c 50 -j 50 -t 400000`
`1、人群数量select count(*) from (select uid from t_user_tag where tag=1 intersectselect uid from t_user_tag where tag=3) t;-- Time: 1494.789 ms (00:01.495)2、提取人群IDselect uid from t_user_tag where tag=1 intersectselect uid from t_user_tag where tag=3;-- Time: 3246.184 ms (00:03.246)`
`1、人群数量select count(*) from (select uid from t_user_tag where tag=1 unionselect uid from t_user_tag where tag=3unionselect uid from t_user_tag where tag=10unionselect uid from t_user_tag where tag=200) t;-- Time: 3577.714 ms (00:03.578)2、提取人群IDselect uid from t_user_tag where tag=1 unionselect uid from t_user_tag where tag=3unionselect uid from t_user_tag where tag=10unionselect uid from t_user_tag where tag=200;-- Time: 5682.458 ms (00:05.682)`
`public | t_user_tag         | table | postgres | 62 GB   |  public | t_user_tag_pkey    | index | postgres | t_user_tag    | 61 GB  |`

## Solution 2 Demo

`create table t_tag_dict (tag int primary key,   -- 标签(人群)idinfo text,  -- 人群描述crt_time timestamp  -- 时间);`
`insert into t_tag_dict values (1, '男', now());insert into t_tag_dict values (2, '女', now());insert into t_tag_dict values (3, '大于24岁', now());-- ...insert into t_tag_dict select generate_series(4,100000), md5(random()::text), clock_timestamp();`
`create table t_user_tags (uid int8 primary key,   -- 用户idtags int[],            -- 用户标签（人群）数组mod_time timestamp     -- 时间);`
`create or replace function gen_rand_tags(int,int) returns int[] as \$\$  select array_agg(ceil(random()*\$1)::int) from generate_series(1,\$2);\$\$ language sql strict;`
`select gen_rand_tags(100000, 8);                   gen_rand_tags                   --------------------------------------------------- {43494,46038,74102,25308,99129,40893,33653,29690}(1 row)`
`insert into t_user_tags select generate_series(1,10000000), array_append(gen_rand_tags(100000, 63),1), now();insert into t_user_tags select generate_series(10000001,20000000), array_append(gen_rand_tags(100000, 63),2), now();`
`create index idx_t_user_tags_1 on t_user_tags using gin (tags);`
`1、人群数量select count(uid) from t_user_tags where tags @> array[1,3];2、提取人群IDselect uid from t_user_tags where tags @> array[1,3];`
`1、人群数量select count(uid) from t_user_tags where tags && array[1,3,10,200];2、提取人群IDselect uid from t_user_tags where tags && array[1,3,10,200];`

## Solution 3 Demo

`安装插件 – create extension roaringbitmap;bitmap输出格式 – set roaringbitmap.output_format='bytea|array';bitmap取值范围 – 40亿（int4） 构造bitmap –  rb_build(int4[]) bitmap转换为数组或多条记录 - rb_to_array(rb) – rb_iterate(rb) bitmap内包含对象个数 – rb_cardinality(rb) 逻辑运算: 与、或、异或、差SELECT roaringbitmap('{1,2,3}') | roaringbitmap('{3,4,5}');SELECT roaringbitmap('{1,2,3}') & roaringbitmap('{3,4,5}');SELECT roaringbitmap('{1,2,3}') # roaringbitmap('{3,4,5}');SELECT roaringbitmap('{1,2,3}') - roaringbitmap('{3,4,5}');聚合运算: build rb、与、或、异或SELECT rb_build_agg(e) FROM generate_series(1,100) e;SELECT rb_or_agg(bitmap) FROM t1;SELECT rb_and_agg(bitmap) FROM t1;SELECT rb_xor_agg(bitmap) FROM t1;聚合并统计对象数(与、或、异或)rb_or_cardinality_aggrb_and_cardinality_aggrb_xor_cardinality_agg逻辑判断: 包含、相交、相等、不相等Opperator   Input   Output  Desc    Example Result@>  roaringbitmap,roaringbitmap bool    contains    roaringbitmap('{1,2,3}') @> roaringbitmap('{3,4,5}')    f@>  roaringbitmap,integer   bool    contains    roaringbitmap('{1,2,3,4,5}') @> 3   t<@  roaringbitmap,roaringbitmap bool    is contained by roaringbitmap('{1,2,3}')    f<@  integer,roaringbitmap   bool    is contained by 3   t&&  roaringbitmap,roaringbitmap bool    overlap (have elements in common)   roaringbitmap('{1,2,3}') && roaringbitmap('{3,4,5}')    t=   roaringbitmap,roaringbitmap bool    equal   roaringbitmap('{1,2,3}') = roaringbitmap('{3,4,5}') f<>  roaringbitmap,roaringbitmap bool    not equal   roaringbitmap('{1,2,3}') <> roaringbitmap('{3,4,5}')    t`
`create extension roaringbitmap;`
`create table t_tag_users (  tagid int primary key,   -- 用户标签（人群）id   uid_offset int,          -- 由于userid是int8类型，roaringbitmap内部使用int4存储，需要转换一下。     userbits roaringbitmap,     -- 用户id聚合的 bitmap    mod_time timestamp       -- 时间 );`
`insert into t_tag_users select tagid, uid_offset, rb_build_agg(uid::int) as userbits from (select   unnest(tags) as tagid,   (uid / (2^31)::int8) as uid_offset,   mod(uid, (2^31)::int8) as uid from t_user_tags ) t group by tagid, uid_offset;`
`1、人群数量select sum(ub) from (select uid_offset,rb_and_cardinality_agg(userbits) as ub from t_tag_users where tagid in (1,3) group by uid_offset) t;2、提取人群IDselect uid_offset,rb_and_agg(userbits) as ub from t_tag_users where tagid in (1,3) group by uid_offset;`
`1、人群数量select sum(ub) from (select uid_offset,rb_or_cardinality_agg(userbits) as ub from t_tag_users where tagid in (1,3,10,200) group by uid_offset) t;2、提取人群IDselect uid_offset,rb_or_agg(userbits) as ub from t_tag_users where tagid in (1,3,10,200) group by uid_offset;`

# Original Source:

Follow me to keep abreast with the latest technology news, industry insights, and developer trends.

## More from Alibaba Cloud

Follow me to keep abreast with the latest technology news, industry insights, and developer trends.