EXISTS is my new favorite tool

Mon Jul 13, 2009 by Peter Larsson in optimization, sql-server-2008, algorithms, sql-server-2005, sql-server-2000

Since this blog post http://weblogs.sqlteam.com/peterl/archive/2007/09/24/Finding-group-of-records-with-a-certain-status.aspx I have kept the technique in similar situations and perhaps never bothered to rethink my prerogative.

Well, today I have. Recently I blogged here http://weblogs.sqlteam.com/peterl/archive/2009/06/12/Timings-of-different-techniques-for-finding-missing-records.aspx about finding records in one table which are missing in another table. In that blog post, I noticed that NOT EXISTS were the better choice for all scenarios, and have learnt to adopt the NOT EXISTS approach.

Why haven't I extended that knowledge to the "Certain status" scenario? I can't tell. And that is a good thing.
Because being active on a good forum now and then makes you rethink your positions. As I did today.

Here is the test case from which I now have decided to use NOT EXISTS in the "Certain status" cases from now on.

CREATE TABLE    #Patients
                (
                    PatientID SMALLINT
                )

INSERT #Patients
        (
            PatientID
        )

SELECT Number
FROM    master..spt_values
WHERE   Type = 'P'

CREATE UNIQUE CLUSTERED INDEX IX_Patients ON #Patients(PatientID)

CREATE TABLE    #Medications
                (
                    PatientID SMALLINT,
                    MedicinID SMALLINT
                )

INSERT          #Medications
                (
                    PatientID,
                    MedicinID
                )
SELECT DISTINCT ABS(CHECKSUM(NEWID())) % 2048,
                ABS(CHECKSUM(NEWID())) % 32768
FROM            master..spt_values AS v1
INNER JOIN      master..spt_values AS v2 ON v2.Type = 'P'
WHERE           v1.Type = 'P'

CREATE UNIQUE CLUSTERED INDEX IX_Medications ON #Medications(PatientID, MedicinID)

CREATE NONCLUSTERED INDEX UX_Medications ON #Medications(MedicinID)
INCLUDE(PatientID)

CREATE NONCLUSTERED INDEX YX_Medications ON #Medications(PatientID)
INCLUDE(MedicinID)

-- NOT EXISTS, my new favorite
SELECT p.PatientID
FROM    #Patients AS p
WHERE   NOT EXISTS (
                        SELECT *
                        FROM    #Medications AS m
                        WHERE   m.PatientID = p.PatientID
                                AND m.MedicinID = 0
                    )

-- HAVING, my old favorite (Peso 1)
SELECT      p.PatientID
FROM        #Patients AS p
INNER JOIN (
                SELECT      PatientID
                FROM        #Medications
                GROUP BY    PatientID
                HAVING      MAX(CASE WHEN MedicinID = 0 THEN 1 ELSE 0 END) = 0
            ) AS x ON x.PatientID = p.PatientID

-- Peso 2
SELECT p.PatientID
FROM    #Patients AS p
WHERE   EXISTS (
                   SELECT 0

                   EXCEPT

                   SELECT m.MedicinID
                   FROM    #Medications AS m
                   WHERE   m.PatientID = p.PatientID
               )

-- Peso 3
SELECT      p.PatientID
FROM        #Patients AS p
LEFT JOIN   #Medications AS m ON m.PatientID = p.PatientID
                AND m.MedicinID = 0
WHERE       m.PatientID IS NULL

-- Peso 4
SELECT PatientID,
        0
FROM    #Patients

EXCEPT

SELECT PatientID,
        MedicinID
FROM    #Medications

-- Peso 5
SELECT p.PatientID
FROM    #Patients AS p
WHERE   p.PatientID NOT IN (
                               SELECT m.PatientID
                               FROM    #Medications AS m
                               WHERE   m.MedicinID = 0
                                       AND m.PatientID = p.PatientID
                           )

DROP TABLE #Patients,
            #Medications