PostgreSQL: Case insensitive string comparison

后端 未结 6 1809
囚心锁ツ
囚心锁ツ 2020-12-04 16:34

Is there a simple ignore-case-comparison for PostgreSQL?

I want to replace:

SELECT id, user_name 
    FROM users 
        WHERE lower(email) IN (lowe         


        
相关标签:
6条回答
  • 2020-12-04 16:51

    You can also create an index on lower(email).

    0 讨论(0)
  • 2020-12-04 16:56
    Use ‘Collate SQL_Latin1_General_CP1_CS_AS’ for it.
    declare @a nvarchar(5)='a'
    declare @b nvarchar(5)='A'
    
    if(@a=@b Collate SQL_Latin1_General_CP1_CS_AS)
    begin
    print 'Match'
    end
    else
    begin
    print 'Not Matched'
    end
    
    0 讨论(0)
  • 2020-12-04 16:57

    Use case-insensitive text data type. Use citext:

    create table emails
    (
    user_id int references users(user_id)
    email citext
    );
    
    insert into emails(user_id, email) values(1, 'linus.Torvalds@linUX.com');
    insert into emails(user_id, email) values(2, 'iSteve.jobs@apple.com');
    
    select * from emails where email in ('linus.torvalds@Linux.com','isteve.jobs@Apple.com');
    

    In case you cannot find the citext.sql in your contrib directory, copy and paste this in your pgAdmin:

    /* $PostgreSQL: pgsql/contrib/citext/citext.sql.in,v 1.3 2008/09/05 18:25:16 tgl Exp $ */
    
    -- Adjust this setting to control where the objects get created.
    SET search_path = public;
    
    --
    --  PostgreSQL code for CITEXT.
    --
    -- Most I/O functions, and a few others, piggyback on the "text" type
    -- functions via the implicit cast to text.
    --
    
    --
    -- Shell type to keep things a bit quieter.
    --
    
    CREATE TYPE citext;
    
    --
    --  Input and output functions.
    --
    CREATE OR REPLACE FUNCTION citextin(cstring)
    RETURNS citext
    AS 'textin'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citextout(citext)
    RETURNS cstring
    AS 'textout'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citextrecv(internal)
    RETURNS citext
    AS 'textrecv'
    LANGUAGE internal STABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citextsend(citext)
    RETURNS bytea
    AS 'textsend'
    LANGUAGE internal STABLE STRICT;
    
    --
    --  The type itself.
    --
    
    CREATE TYPE citext (
        INPUT          = citextin,
        OUTPUT         = citextout,
        RECEIVE        = citextrecv,
        SEND           = citextsend,
        INTERNALLENGTH = VARIABLE,
        STORAGE        = extended,
        -- make it a non-preferred member of string type category
        CATEGORY       = 'S',
        PREFERRED      = false
    );
    
    --
    -- Type casting functions for those situations where the I/O casts don't
    -- automatically kick in.
    --
    
    CREATE OR REPLACE FUNCTION citext(bpchar)
    RETURNS citext
    AS 'rtrim1'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citext(boolean)
    RETURNS citext
    AS 'booltext'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citext(inet)
    RETURNS citext
    AS 'network_show'
    LANGUAGE internal IMMUTABLE STRICT;
    
    --
    --  Implicit and assignment type casts.
    --
    
    CREATE CAST (citext AS text)    WITHOUT FUNCTION AS IMPLICIT;
    CREATE CAST (citext AS varchar) WITHOUT FUNCTION AS IMPLICIT;
    CREATE CAST (citext AS bpchar)  WITHOUT FUNCTION AS ASSIGNMENT;
    CREATE CAST (text AS citext)    WITHOUT FUNCTION AS ASSIGNMENT;
    CREATE CAST (varchar AS citext) WITHOUT FUNCTION AS ASSIGNMENT;
    CREATE CAST (bpchar AS citext)  WITH FUNCTION citext(bpchar)  AS ASSIGNMENT;
    CREATE CAST (boolean AS citext) WITH FUNCTION citext(boolean) AS ASSIGNMENT;
    CREATE CAST (inet AS citext)    WITH FUNCTION citext(inet)    AS ASSIGNMENT;
    
    --
    -- Operator Functions.
    --
    
    CREATE OR REPLACE FUNCTION citext_eq( citext, citext )
    RETURNS bool
    AS '$libdir/citext'
    LANGUAGE C IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citext_ne( citext, citext )
    RETURNS bool
    AS '$libdir/citext'
    LANGUAGE C IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citext_lt( citext, citext )
    RETURNS bool
    AS '$libdir/citext'
    LANGUAGE C IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citext_le( citext, citext )
    RETURNS bool
    AS '$libdir/citext'
    LANGUAGE C IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citext_gt( citext, citext )
    RETURNS bool
    AS '$libdir/citext'
    LANGUAGE C IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citext_ge( citext, citext )
    RETURNS bool
    AS '$libdir/citext'
    LANGUAGE C IMMUTABLE STRICT;
    
    --
    -- Operators.
    --
    
    CREATE OPERATOR = (
        LEFTARG    = CITEXT,
        RIGHTARG   = CITEXT,
        COMMUTATOR = =,
        NEGATOR    = <>,
        PROCEDURE  = citext_eq,
        RESTRICT   = eqsel,
        JOIN       = eqjoinsel,
        HASHES,
        MERGES
    );
    
    CREATE OPERATOR <> (
        LEFTARG    = CITEXT,
        RIGHTARG   = CITEXT,
        NEGATOR    = =,
        COMMUTATOR = <>,
        PROCEDURE  = citext_ne,
        RESTRICT   = neqsel,
        JOIN       = neqjoinsel
    );
    
    CREATE OPERATOR < (
        LEFTARG    = CITEXT,
        RIGHTARG   = CITEXT,
        NEGATOR    = >=,
        COMMUTATOR = >,
        PROCEDURE  = citext_lt,
        RESTRICT   = scalarltsel,
        JOIN       = scalarltjoinsel
    );
    
    CREATE OPERATOR <= (
        LEFTARG    = CITEXT,
        RIGHTARG   = CITEXT,
        NEGATOR    = >,
        COMMUTATOR = >=,
        PROCEDURE  = citext_le,
        RESTRICT   = scalarltsel,
        JOIN       = scalarltjoinsel
    );
    
    CREATE OPERATOR >= (
        LEFTARG    = CITEXT,
        RIGHTARG   = CITEXT,
        NEGATOR    = <,
        COMMUTATOR = <=,
        PROCEDURE  = citext_ge,
        RESTRICT   = scalargtsel,
        JOIN       = scalargtjoinsel
    );
    
    CREATE OPERATOR > (
        LEFTARG    = CITEXT,
        RIGHTARG   = CITEXT,
        NEGATOR    = <=,
        COMMUTATOR = <,
        PROCEDURE  = citext_gt,
        RESTRICT   = scalargtsel,
        JOIN       = scalargtjoinsel
    );
    
    --
    -- Support functions for indexing.
    --
    
    CREATE OR REPLACE FUNCTION citext_cmp(citext, citext)
    RETURNS int4
    AS '$libdir/citext'
    LANGUAGE C STRICT IMMUTABLE;
    
    CREATE OR REPLACE FUNCTION citext_hash(citext)
    RETURNS int4
    AS '$libdir/citext'
    LANGUAGE C STRICT IMMUTABLE;
    
    --
    -- The btree indexing operator class.
    --
    
    CREATE OPERATOR CLASS citext_ops
    DEFAULT FOR TYPE CITEXT USING btree AS
        OPERATOR    1   <  (citext, citext),
        OPERATOR    2   <= (citext, citext),
        OPERATOR    3   =  (citext, citext),
        OPERATOR    4   >= (citext, citext),
        OPERATOR    5   >  (citext, citext),
        FUNCTION    1   citext_cmp(citext, citext);
    
    --
    -- The hash indexing operator class.
    --
    
    CREATE OPERATOR CLASS citext_ops
    DEFAULT FOR TYPE citext USING hash AS
        OPERATOR    1   =  (citext, citext),
        FUNCTION    1   citext_hash(citext);
    
    --
    -- Aggregates.
    --
    
    CREATE OR REPLACE FUNCTION citext_smaller(citext, citext)
    RETURNS citext
    AS '$libdir/citext'
    LANGUAGE 'C' IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION citext_larger(citext, citext)
    RETURNS citext
    AS '$libdir/citext'
    LANGUAGE 'C' IMMUTABLE STRICT;
    
    CREATE AGGREGATE min(citext)  (
        SFUNC = citext_smaller,
        STYPE = citext,
        SORTOP = <
    );
    
    CREATE AGGREGATE max(citext)  (
        SFUNC = citext_larger,
        STYPE = citext,
        SORTOP = >
    );
    
    --
    -- CITEXT pattern matching.
    --
    
    CREATE OR REPLACE FUNCTION texticlike(citext, citext)
    RETURNS bool AS 'texticlike'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION texticnlike(citext, citext)
    RETURNS bool AS 'texticnlike'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION texticregexeq(citext, citext)
    RETURNS bool AS 'texticregexeq'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION texticregexne(citext, citext)
    RETURNS bool AS 'texticregexne'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OPERATOR ~ (
        PROCEDURE = texticregexeq,
        LEFTARG   = citext,
        RIGHTARG  = citext,
        NEGATOR   = !~,
        RESTRICT  = icregexeqsel,
        JOIN      = icregexeqjoinsel
    );
    
    CREATE OPERATOR ~* (
        PROCEDURE = texticregexeq,
        LEFTARG   = citext,
        RIGHTARG  = citext,
        NEGATOR   = !~*,
        RESTRICT  = icregexeqsel,
        JOIN      = icregexeqjoinsel
    );
    
    CREATE OPERATOR !~ (
        PROCEDURE = texticregexne,
        LEFTARG   = citext,
        RIGHTARG  = citext,
        NEGATOR   = ~,
        RESTRICT  = icregexnesel,
        JOIN      = icregexnejoinsel
    );
    
    CREATE OPERATOR !~* (
        PROCEDURE = texticregexne,
        LEFTARG   = citext,
        RIGHTARG  = citext,
        NEGATOR   = ~*,
        RESTRICT  = icregexnesel,
        JOIN      = icregexnejoinsel
    );
    
    CREATE OPERATOR ~~ (
        PROCEDURE = texticlike,
        LEFTARG   = citext,
        RIGHTARG  = citext,
        NEGATOR   = !~~,
        RESTRICT  = iclikesel,
        JOIN      = iclikejoinsel
    );
    
    CREATE OPERATOR ~~* (
        PROCEDURE = texticlike,
        LEFTARG   = citext,
        RIGHTARG  = citext,
        NEGATOR   = !~~*,
        RESTRICT  = iclikesel,
        JOIN      = iclikejoinsel
    );
    
    CREATE OPERATOR !~~ (
        PROCEDURE = texticnlike,
        LEFTARG   = citext,
        RIGHTARG  = citext,
        NEGATOR   = ~~,
        RESTRICT  = icnlikesel,
        JOIN      = icnlikejoinsel
    );
    
    CREATE OPERATOR !~~* (
        PROCEDURE = texticnlike,
        LEFTARG   = citext,
        RIGHTARG  = citext,
        NEGATOR   = ~~*,
        RESTRICT  = icnlikesel,
        JOIN      = icnlikejoinsel
    );
    
    --
    -- Matching citext to text. 
    --
    
    CREATE OR REPLACE FUNCTION texticlike(citext, text)
    RETURNS bool AS 'texticlike'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION texticnlike(citext, text)
    RETURNS bool AS 'texticnlike'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION texticregexeq(citext, text)
    RETURNS bool AS 'texticregexeq'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION texticregexne(citext, text)
    RETURNS bool AS 'texticregexne'
    LANGUAGE internal IMMUTABLE STRICT;
    
    CREATE OPERATOR ~ (
        PROCEDURE = texticregexeq,
        LEFTARG   = citext,
        RIGHTARG  = text,
        NEGATOR   = !~,
        RESTRICT  = icregexeqsel,
        JOIN      = icregexeqjoinsel
    );
    
    CREATE OPERATOR ~* (
        PROCEDURE = texticregexeq,
        LEFTARG   = citext,
        RIGHTARG  = text,
        NEGATOR   = !~*,
        RESTRICT  = icregexeqsel,
        JOIN      = icregexeqjoinsel
    );
    
    CREATE OPERATOR !~ (
        PROCEDURE = texticregexne,
        LEFTARG   = citext,
        RIGHTARG  = text,
        NEGATOR   = ~,
        RESTRICT  = icregexnesel,
        JOIN      = icregexnejoinsel
    );
    
    CREATE OPERATOR !~* (
        PROCEDURE = texticregexne,
        LEFTARG   = citext,
        RIGHTARG  = text,
        NEGATOR   = ~*,
        RESTRICT  = icregexnesel,
        JOIN      = icregexnejoinsel
    );
    
    CREATE OPERATOR ~~ (
        PROCEDURE = texticlike,
        LEFTARG   = citext,
        RIGHTARG  = text,
        NEGATOR   = !~~,
        RESTRICT  = iclikesel,
        JOIN      = iclikejoinsel
    );
    
    CREATE OPERATOR ~~* (
        PROCEDURE = texticlike,
        LEFTARG   = citext,
        RIGHTARG  = text,
        NEGATOR   = !~~*,
        RESTRICT  = iclikesel,
        JOIN      = iclikejoinsel
    );
    
    CREATE OPERATOR !~~ (
        PROCEDURE = texticnlike,
        LEFTARG   = citext,
        RIGHTARG  = text,
        NEGATOR   = ~~,
        RESTRICT  = icnlikesel,
        JOIN      = icnlikejoinsel
    );
    
    CREATE OPERATOR !~~* (
        PROCEDURE = texticnlike,
        LEFTARG   = citext,
        RIGHTARG  = text,
        NEGATOR   = ~~*,
        RESTRICT  = icnlikesel,
        JOIN      = icnlikejoinsel
    );
    
    --
    -- Matching citext in string comparison functions.
    -- XXX TODO Ideally these would be implemented in C.
    --
    
    CREATE OR REPLACE FUNCTION regexp_matches( citext, citext ) RETURNS TEXT[] AS $$
        SELECT pg_catalog.regexp_matches( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION regexp_matches( citext, citext, text ) RETURNS TEXT[] AS $$
        SELECT pg_catalog.regexp_matches( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN  $3 || 'i' ELSE $3 END );
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION regexp_replace( citext, citext, text ) returns TEXT AS $$
        SELECT pg_catalog.regexp_replace( $1::pg_catalog.text, $2::pg_catalog.text, $3, 'i');
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION regexp_replace( citext, citext, text, text ) returns TEXT AS $$
        SELECT pg_catalog.regexp_replace( $1::pg_catalog.text, $2::pg_catalog.text, $3, CASE WHEN pg_catalog.strpos($4, 'c') = 0 THEN  $4 || 'i' ELSE $4 END);
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION regexp_split_to_array( citext, citext ) RETURNS TEXT[] AS $$
        SELECT pg_catalog.regexp_split_to_array( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION regexp_split_to_array( citext, citext, text ) RETURNS TEXT[] AS $$
        SELECT pg_catalog.regexp_split_to_array( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN  $3 || 'i' ELSE $3 END );
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION regexp_split_to_table( citext, citext ) RETURNS SETOF TEXT AS $$
        SELECT pg_catalog.regexp_split_to_table( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION regexp_split_to_table( citext, citext, text ) RETURNS SETOF TEXT AS $$
        SELECT pg_catalog.regexp_split_to_table( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0 THEN  $3 || 'i' ELSE $3 END );
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION strpos( citext, citext ) RETURNS INT AS $$
        SELECT pg_catalog.strpos( pg_catalog.lower( $1::pg_catalog.text ), pg_catalog.lower( $2::pg_catalog.text ) );
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION replace( citext, citext, citext ) RETURNS TEXT AS $$
        SELECT pg_catalog.regexp_replace( $1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text, '([^a-zA-Z_0-9])', E'\\\\\\1', 'g'), $3::pg_catalog.text, 'gi' );
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION split_part( citext, citext, int ) RETURNS TEXT AS $$
        SELECT (pg_catalog.regexp_split_to_array( $1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text, '([^a-zA-Z_0-9])', E'\\\\\\1', 'g'), 'i'))[$3];
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    CREATE OR REPLACE FUNCTION translate( citext, citext, text ) RETURNS TEXT AS $$
        SELECT pg_catalog.translate( pg_catalog.translate( $1::pg_catalog.text, pg_catalog.lower($2::pg_catalog.text), $3), pg_catalog.upper($2::pg_catalog.text), $3);
    $$ LANGUAGE SQL IMMUTABLE STRICT;
    
    0 讨论(0)
  • 2020-12-04 17:05

    First, what not to do: don't use ILIKE...

    create table y
    (
    id serial not null,
    email text not null unique
    );
    
    insert into y(email) 
    values('iSteve.jobs@apple.com') ,('linus.Torvalds@linUX.com');
    insert into y(email) 
    select n from generate_series(1,1000) as i(n);
    
    -- no need to create an index on email, 
    -- UNIQUE constraint on email already makes an index.
    -- thanks a_horse_with_no_name
    -- create index ix_y on y(email);
    
    explain select * from y 
    where email ilike 
        ANY(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']);
    

    Execution Plan:

    memdb=# explain select * from y where email ilike ANY(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']);
                                           QUERY PLAN                                       
    ----------------------------------------------------------------------------------------
     Seq Scan on y  (cost=0.00..17.52 rows=1 width=7)
       Filter: (email ~~* ANY ('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::text[]))
    (2 rows)
    

    It's either you create an indexed lower expression...

    create function lower(t text[]) returns text[]
    as
    $$
    select lower($1::text)::text[]
    $$ language sql;
    
    create unique index ix_y_2 on y(lower(email));
    
    explain select * from y 
    where lower(email) = 
        ANY(lower(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']));
    

    ...which properly uses index:

    memdb=# explain select * from y where lower(email) = ANY(lower(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']));
                                                               QUERY PLAN                                                           
    --------------------------------------------------------------------------------------------------------------------------------
     Bitmap Heap Scan on y  (cost=22.60..27.98 rows=10 width=7)
       Recheck Cond: (lower(email) = ANY ((lower(('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::text[])::text))::text[]))
       ->  Bitmap Index Scan on ix_y_2  (cost=0.00..22.60 rows=10 width=0)
             Index Cond: (lower(email) = ANY ((lower(('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::text[])::text))::text[]))
    (4 rows)
    

    Or you use citext data type...

    create table x
    (
    id serial not null,
    email citext not null unique
    );
    
    insert into x(email) 
    values('iSteve.jobs@apple.com'),('linus.Torvalds@linUX.com');
    insert into x(email) 
    select n from generate_series(1,1000) as i(n);
    
    -- no need to create an index on email, 
    -- UNIQUE constraint on email already makes an index.
    -- thanks a_horse_with_no_name
    -- create index ix_x on x(email);
    
    explain select * from x 
    where email = 
    ANY(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']::citext[]);
    

    ...which properly uses index even you don't create an index on expression (e.g. create index zzz on yyy(lower(field))):

    memdb=# explain select * from x where email = ANY(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']::citext[]);
                                                QUERY PLAN                                            
    --------------------------------------------------------------------------------------------------
    Bitmap Heap Scan on x  (cost=8.57..13.91 rows=2 width=36)
      Recheck Cond: (email = ANY ('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::citext[]))
      ->  Bitmap Index Scan on x_email_key  (cost=0.00..8.57 rows=2 width=0)
            Index Cond: (email = ANY ('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::citext[]))
    

    If citext field type is not yet installed, run this:

    CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;
    
    0 讨论(0)
  • 2020-12-04 17:09

    Things have changed in the last 4 years since this question was answered and the recommendation "don't use ILIKE" isn't true any more (at least in such a general way).

    In fact, depending on the data distribution, ILIKE with a trigram index might even be faster then citext.

    For a unique index there is indeed big difference, which can be seen when using Michael's test-setup:

    create table y
    (
      id serial not null,
      email text not null unique
    );
    
    insert into y(email) 
    select 'some.name'||n||'@foobar.com'
    from generate_series(1,100000) as i(n);
    
    -- create a trigram index to support ILIKE    
    create index ix_y on y using gin (email gin_trgm_ops);
    
    create table x
    (
      id serial not null,
      email citext not null unique
    );
    -- no need to create an index
    -- the UNIQUE constraint will create a regular B-Tree index
    
    insert into x(email) 
    select email
    from y;
    

    The execution plan for using ILIKE:

    explain (analyze)
    select * 
    from y 
    where email ilike ANY (ARRAY['Some.Name420@foobar.com','Some.Name42@foobar.com']);
    
    Bitmap Heap Scan on y  (cost=126.07..154.50 rows=20 width=29) (actual time=60.696..60.818 rows=2 loops=1)
      Recheck Cond: (email ~~* ANY ('{Some.Name420@foobar.com,Some.Name42@foobar.com}'::text[]))
      Rows Removed by Index Recheck: 13
      Heap Blocks: exact=11
      ->  Bitmap Index Scan on ix_y  (cost=0.00..126.07 rows=20 width=0) (actual time=60.661..60.661 rows=15 loops=1)
            Index Cond: (email ~~* ANY ('{Some.Name420@foobar.com,Some.Name42@foobar.com}'::text[]))
    Planning time: 0.952 ms
    Execution time: 61.004 ms
    

    And for using citext:

    explain (analyze)
    select * 
    from x 
    where email = ANY (ARRAY['Some.Name420@foobar.com','Some.Name42@foobar.com']);
    
    Index Scan using x_email_key on x  (cost=0.42..5.85 rows=2 width=29) (actual time=0.111..0.203 rows=2 loops=1)
      Index Cond: (email = ANY ('{Some.Name420@foobar.com,Some.Name42@foobar.com}'::citext[]))
    Planning time: 0.115 ms
    Execution time: 0.254 ms
    

    Note that the ILIKE query is in fact something different then the = query for citext as ILIKE would honor wildcards.


    However for a non-unique index things look different. The following setup is based on a recent question asking the same:

    create table data
    (
      group_id serial primary key,
      name text
    );
    
    create table data_ci
    (
      group_id serial primary key,
      name citext
    );
    
    insert into data(name)
    select 'data'||i.n
    from generate_series(1,1000) as i(n), generate_series(1,1000) as i2(n);
    
    insert into data_ci(group_id, name)
    select group_id, name
    from data;
    
    create index ix_data_gin on data using gin (name public.gin_trgm_ops);
    create index ix_data_ci on data_ci (name);
    

    So we have a million rows in each table and a 1000 distinct values for the name column and for each distinct value we have 1000 duplicates. A query looking for 3 different values will thus return 3000 rows.

    In this case the trigram index is substantially faster then the BTree index:

    explain (analyze)
    select *
    from data 
    where name ilike any (array['Data1', 'data2', 'DATA3']);
    
    Bitmap Heap Scan on data  (cost=88.25..1777.61 rows=1535 width=11) (actual time=2.906..11.064 rows=3000 loops=1)
      Recheck Cond: (name ~~* ANY ('{Data1,data2,DATA3}'::text[]))
      Heap Blocks: exact=17
      ->  Bitmap Index Scan on ix_data_gin  (cost=0.00..87.87 rows=1535 width=0) (actual time=2.869..2.869 rows=3000 loops=1)
            Index Cond: (name ~~* ANY ('{Data1,data2,DATA3}'::text[]))
    Planning time: 2.174 ms
    Execution time: 11.282 ms
    

    And the btree index on the citext column now uses a Seq Scan

    explain analyze
    select *
    from data_ci
    where name = any (array['Data1', 'data2', 'DATA3']);
    
    Seq Scan on data_ci  (cost=0.00..10156.00 rows=2904 width=11) (actual time=0.449..304.301 rows=1000 loops=1)
      Filter: ((name)::text = ANY ('{Data1,data2,DATA3}'::text[]))
      Rows Removed by Filter: 999000
    Planning time: 0.152 ms
    Execution time: 304.360 ms
    

    Also the size of the GIN index is actually smaller then the one on the citext column:

    select pg_size_pretty(pg_total_relation_size('ix_data_gin')) as gin_index_size, 
           pg_size_pretty(pg_total_relation_size('ix_data_ci')) as citex_index_size
    
    gin_index_size | citex_index_size
    ---------------+-----------------
    11 MB          | 21 MB           
    

    The above was done using Postgres 9.6.1 on a Windows laptop with random_page_cost set to 1.5

    0 讨论(0)
  • 2020-12-04 17:10
    select * 
    where email ilike 'me@example.com'
    

    ilike is similar to like but case insensitive. For escape character use replace()

    where email ilike replace(replace(replace($1, '~', '~~'), '%', '~%'), '_', '~_') escape '~'
    

    or you could create a function to escape text; for array of text use

    where email ilike any(array['adamB@a.com', 'eveA@b.com'])
    
    0 讨论(0)
提交回复
热议问题