/*
create table eg_event_artists
(
id bigint primary key,
artist_id varchar(10)
);
copy eg_event_artists from 'c://tmp//xx.csv' with csv delimiter ';' header ;
-- xx.csv zawierało 100 tys losowych 5-cioznakowych stringów
*/
with a(aid) as(values
('cf6fa'),('de0e8'),('c7373'),('a5794'),('ce7e6'),('d5795')
)
select a.aid, e.* from a left join eg_event_artists e on a.aid=e.artist_id
where e.artist_id is null
Bez WHERE czas 32 ms. Z WHERE ok.50 ms
Znajduje dwa NULLE
EXPLAIN VERBOSE:
Hash Anti Join (cost=3280.07..3771.39 rows=1 width=46)
Output: a.aid, e.id, e.artist_id
Hash Cond: (a.aid = (e.artist_id)::text)
CTE a
-> Values Scan on "*VALUES*" (cost=0.00..0.08 rows=6 width=32)
Output: "*VALUES*".column1
-> CTE Scan on a (cost=0.00..0.12 rows=6 width=32)
Output: a.aid
-> Hash (cost=1541.00..1541.00 rows=100000 width=14)
Output: e.id, e.artist_id
-> Seq Scan on public.eg_event_artists e (cost=0.00..1541.00 rows=100000 width=14)
Output: e.id, e.artist_id
po założeniu indeksu:
create index on eg_event_artists(artist_id)
Statystyka:
Nested Loop Anti Join (cost=0.37..46.13 rows=1 width=46)
Output: a.aid, e.id, e.artist_id
CTE a
-> Values Scan on "*VALUES*" (cost=0.00..0.08 rows=6 width=32)
Output: "*VALUES*".column1
-> CTE Scan on a (cost=0.00..0.12 rows=6 width=32)
Output: a.aid
-> Index Scan using eg_event_artists_artist_id_idx on public.eg_event_artists e (cost=0.29..8.31 rows=1 width=14)
Output: e.id, e.artist_id
Index Cond: (a.aid = (e.artist_id)::text)