Similarity Analysis for PostgreSQL Text Databases

  • sorting by image or facial similarity
  • sorting by overlapping hobbies
  • sorting by age approximation
  • sorting by distance
  • sorting by textual similarity
  • sorting by word-break similarities

Sorting by Image or Facial Similarity

Sorting by Overlapping Hobbies

Sorting by Age Approximation

postgres=# create extension btree_gist;
CREATE EXTENSION
postgres=# create table test12(id int);
CREATE TABLE
postgres=# insert into test12 select trunc(random()*1000) from generate_series(1,100000);
INSERT 0 100000
postgres=# create index idx_test12 on test12 using gist(id);
CREATE INDEX
postgres=# select * from test12 order by id <-> 100 limit 10;
id
-----
100
100
100
100
100
100
100
100
100
100
(10 rows)

Sorting by Distance

postgres=# create table test13(c1 point);
CREATE TABLE
postgres=# insert into test13 select ('('||trunc(random()*1000)||','||trunc(random()*5000)||')')::point from generate_series(1,10000);
INSERT 0 10000
postgres=# create index idx_test13 on test13 using gist(c1);
CREATE INDEX
postgres=# select * from test13 order by c1 <-> point '(1,10000)' limit 10;
c1
------------
(58,4993)
(191,4995)
(48,4991)
(326,4998)
(99,4988)
(205,4991)
(348,4998)
(53,4986)
(174,4988)
(136,4984)
(10 rows)

Sorting by Textual Similarity

postgres=# create extension pg_trgm;
CREATE EXTENSION
postgres=# create table test14(c1 text);
CREATE TABLE
postgres=# insert into test14 values ('Hello Digoal'), ('China'), ('Hello China'), ('Nihao Digoal');
INSERT 0 4
postgres=# select * from test14;
c1
--------------
Hello Digoal
China
Hello China
Nihao Digoal
(4 rows)
postgres=# create index idx_test14 on test14 using gist(c1 gist_trgm_ops);
CREATE INDEX
postgres=# explain select *,c1 <-> 'Digoal' from test14 order by c1 <-> 'Digoal' limit 2;
QUERY PLAN
--------------------------------------------------------------------------------
Limit (cost=0.13..4.17 rows=2 width=36)
-> Index Scan using idx_test14 on test14 (cost=0.13..8.21 rows=4 width=36)
Order By: (c1 <-> 'Digoal'::text)
(3 rows)
postgres=# select *,c1 <-> 'Digoal' from test14 order by c1 <-> 'Digoal' limit 2;
c1 | ?column?
--------------+----------
Hello Digoal | 0.461538
Nihao Digoal | 0.461538
(2 rows)

Sorting by Word-Break Similarity

export PATH=/home/digoal/pgsql9.6/bin:$PATH
git clone https://github.com/postgrespro/rum
cd rum
make USE_PGXS=1
make USE_PGXS=1 install
//
//
git clone https://github.com/jaiminpan/pg_jieba
cd pg_jieba
make USE_PGXS=1
make USE_PGXS=1 install
//
//
postgres=# create extension rum;
CREATE EXTENSION
postgres=# create extension pg_jieba;
CREATE EXTENSION
// Word Example examples:
postgres=# select * from to_tsvector('jiebacfg', 'Xiao Ming graduated with a master's degree from the Institute of Computing Technology of the Chinese Academy of Sciences, and later went on to study at Kyoto University in Japan.');
to_tsvector
----------------------------------------------------------------------------------
'Chinese Academy of Sciences':5 'Xiao Ming':1 'Kyoto University':10 'graduated':3 'study':11 'master's degree':2 'Institute of Computing Technology':6
(1 row)
// With similarities
postgres=# select * from rum_ts_distance(to_tsvector('jiebacfg', 'Xiao Ming graduated with a master's degree from the Institute of Computing Technology of the Chinese Academy of Sciences, and later went on to study at Kyoto University in Japan.') , to_tsquery(''Institute of Computing Technology'));
rum_ts_distance
-----------------
16.4493
(1 row)
// Without similarities
postgres=# select * from rum_ts_distance(to_tsvector('jiebacfg', 'Xiao Ming graduated with a master's degree from the Institute of Computing Technology of the Chinese Academy of Sciences, and later went on to study at Kyoto University in Japan.) , to_tsquery('Computer Science'));
rum_ts_distance
-----------------
Infinity
(1 row)
// One or the other has similarities
postgres=# select * from rum_ts_distance(to_tsvector('jiebacfg', 'Xiao Ming graduated with a master's degree from the Institute of Computing Technology of the Chinese Academy of Sciences, and later went on to study at Kyoto University in Japan.') , to_tsquery('''Institute of Computing Technology | master's degree'));
rum_ts_distance
-----------------
8.22467
(1 row)
// Both have similarities
postgres=# select * from rum_ts_distance(to_tsvector('jiebacfg', 'Xiao Ming graduated with a master's degree from the Institute of Computing Technology of the Chinese Academy of Sciences, and later went on to study at Kyoto University in Japan.') , to_tsquery('''Institute of Computing Technology & master's degree'));
rum_ts_distance
-----------------
32.8987
(1 row)
// Order
postgres=# create table test15(c1 tsvector);
CREATE TABLE
postgres=# insert into test15 values (to_tsvector('jiebacfg', 'hello china, i''m digoal')), (to_tsvector('jiebacfg', 'hello world, i''m postgresql')), (to_tsvector('jiebacfg', 'how are you, i''m digoal'));
INSERT 0 3
postgres=# select * from test15;
c1
-----------------------------------------------------
' ':2,5,9 'china':3 'digoal':10 'hello':1 'm':8
' ':2,5,9 'hello':1 'm':8 'postgresql':10 'world':3
' ':2,4,7,11 'digoal':12 'm':10
(3 rows)
postgres=# create index idx_test15 on test15 using rum(c1 rum_tsvector_ops);
CREATE INDEX
postgres=# select *,c1 <=> to_tsquery('hello') from test15;
c1 | ?column?
-----------------------------------------------------+----------
' ':2,5,9 'china':3 'digoal':10 'hello':1 'm':8 | 16.4493
' ':2,5,9 'hello':1 'm':8 'postgresql':10 'world':3 | 16.4493
' ':2,4,7,11 'digoal':12 'm':10 | Infinity
(3 rows)
postgres=# explain select *,c1 <=> to_tsquery('postgresql') from test15 order by c1 <=> to_tsquery('postgresql');
QUERY PLAN
--------------------------------------------------------------------------------
Index Scan using idx_test15 on test15 (cost=3600.25..3609.06 rows=3 width=36)
Order By: (c1 <=> to_tsquery('postgresql'::text))
(2 rows)

Original Source

--

--

--

Follow me to keep abreast with the latest technology news, industry insights, and developer trends. Alibaba Cloud website:https://www.alibabacloud.com

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

Migrate Data from SQL to Cosmos DB

Tasker and Telegram integration: live location

Pandas in Python

Tips for a successful Hackathon

How I could access your internal servers, steal and modify your image repository

Low Codes- Accelerating the Performance of Organizations as the World Gets Hit With COVID 19…

Ephemeral Pods in Kubernetes

Network Structure

Pulumi. VPC and Fargate configuration

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Alibaba Cloud

Alibaba Cloud

Follow me to keep abreast with the latest technology news, industry insights, and developer trends. Alibaba Cloud website:https://www.alibabacloud.com

More from Medium

Building A Structured Streaming Data Pipeline

Redis — the optimal solution for high-performance data retrieval

Title pic

Streaming NFT Analytics with Apache NiFi and Apache Pulsar (FLiPN Stack)

Produce and consume messages from a Kafka topic using docker