[CORE-63] "Exclude" creates incorrect query. Created: 27/Apr/12  Updated: 05/Jun/14  Resolved: 29/Jan/13

Status: Closed
Project: i2b2 Core Software
Component/s: CRC Cell
Affects Version/s: 1.6.00
Fix Version/s: 1.6.03

Type: Bug Priority: Major
Reporter: Nathan Graham Assignee: Mike Mendis
Resolution: Fixed Votes: 0
Labels: None
Remaining Estimate: Not Specified
Time Spent: Not Specified
Original Estimate: Not Specified

Participant/s:

 Description   
This appears to be similar to https://community.i2b2.org/jira/browse/LEOPARD-86.


CORRECT SQL/Result:
Panel 1: "Hispanic/Latino", "Black/African American"
Panel 2: Frontiers Participant


INCORRECT SQL/Result
Panel 1: "Hispanic/Latino", "Black/African American"
Panel 2: Frontiers Participant
Panel 3: EXCLUDE ( "Deceased per KUMC records", "Deceased per SSA records")


2nd Example of INCORRECT SQL/Result
Panel 1: "Hispanic/Latino", "Black/African American"
Panel 2: Frontiers Participant
Panel 3: EXCLUDE "female"


It apears that in the incorrect case, the generated SQL first ANDs the items in Panel 1 rather than ORs them.

Please refer to the following link: http://informatics.kumc.edu/work/ticket/1075#comment:6 for screenshots, annotated SQL, XML traffic.

Annotated SQL from QT_QUERY_MASTER:
/*******************************************************************************
Starting (eth:hispanic/latino OR race:black/african) AND HICTR (Frontiers)

Here's what I have set:

Panel 1:
Eth: Hispanic/Latino
Race: Black/African American

Panel 2:
HICTR Participant
*******************************************************************************/

--From i2b2 Query Master
INSERT INTO BlueHerondata.QUERY_GLOBAL_TEMP
  ( patient_num , panel_count
  )
SELECT patient_num ,
  1 AS panel_count
FROM
  (SELECT
    /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
    patient_num
  FROM BlueHerondata.observation_fact
  WHERE concept_cd IN
    (SELECT concept_cd
    FROM BlueHerondata.concept_dimension
    WHERE concept_path LIKE '\i2b2\Demographics\Ethnicity\Hispanic, Latino or Spanish Origin\%'
    )
  GROUP BY patient_num
  ) t ;

--Correct 'or'
  
INSERT INTO BlueHerondata.QUERY_GLOBAL_TEMP
  ( patient_num , panel_count
  )
SELECT patient_num ,
  1 AS panel_count
FROM
  (SELECT
    /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
    patient_num
  FROM BlueHerondata.observation_fact
  WHERE concept_cd IN
    (SELECT concept_cd
    FROM BlueHerondata.concept_dimension
    WHERE concept_path LIKE '\i2b2\Demographics\Race\Black or African American\%'
    )
  GROUP BY patient_num
  ) t ;
  
--Correct 'and'
  
UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count =2
WHERE EXISTS
  (SELECT 1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\HICTR Participant\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
  AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;
  
  
INSERT INTO BlueHerondata.DX
  ( patient_num
  )
SELECT *
FROM
  ( SELECT DISTINCT patient_num
  FROM BlueHerondata.QUERY_GLOBAL_TEMP
  WHERE panel_count = 2
  ) q;
  
/*******************************************************************************
Add "exclude" of deceased:
(eth:hispanic/latino OR race:black/african) AND HICTR (Frontiers) AND !(deceased OR deceased via SSA)

Here's what I have set:

Panel 1:
Eth: Hispanic/Latino
Race: Black/African American

Panel 2:
HICTR Participant

Panel 3 (exclude):
Deceased per KUMC
Deceased per SSA
*******************************************************************************/
--Start with every distinct patient from patient_dimension
INSERT INTO BlueHerondata.QUERY_GLOBAL_TEMP
  ( patient_num , panel_count
  )
SELECT DISTINCT patient_num ,1 FROM BlueHerondata.patient_dimension pat ;

--Checking
select panel_count, count(*) from BlueHerondata.query_global_temp
group by panel_count; --1.9million are 1 now - everyone

--Set everyone to -1 who doesn't come up as Hipanic/Latino
UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count =-1
WHERE NOT EXISTS
  (SELECT patient_num ,
    1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\Ethnicity\Hispanic, Latino or Spanish Origin\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
  AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;

select panel_count, count(*) from BlueHerondata.query_global_temp
group by panel_count; -- -1=1871058, 1=31043

/*
The below should be an OR...but it's actually an AND as we're just selecting from
what was left from last time. So, we only get those who were coded as both.
*/
  
UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count =-1
WHERE NOT EXISTS
  (SELECT patient_num ,
    1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\Race\Black or African American\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
  AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;

select panel_count, count(*) from BlueHerondata.query_global_temp
group by panel_count; -- -1=1901888, 1=213 <--213 coded as both

/*
The damage has been done - now we select from those who are HICTR (Frontiers)
and move them to the next step.
*/
  
UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count =2
WHERE EXISTS
  (SELECT 1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\HICTR Participant\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
  AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;

/*
Only 2 proceeded on to the next step.
*/
select panel_count, count(*) from BlueHerondata.query_global_temp
group by panel_count; -- 1=206, 2=7, -1=1901888


/*
Move everything to panel that is now panel 2
*/
UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count = 3
WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 2 ;

select panel_count, count(*) from BlueHerondata.query_global_temp
group by panel_count; -- only 7 made it to 3

/*
Of those left, set everyone to -1 who is known to be deceased (KUMC)
*/

UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count = -1
WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 3
AND EXISTS
  (SELECT 1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\Vital Status\Deceased\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;

--No rows updated

/*
Of those left, set everyone to -1 who is known to be deceased (SSA)
*/
UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count = -1
WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 3
AND EXISTS
  (SELECT 1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\Vital Status\Deceased per SSA\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;
  
--0 rows updated

INSERT INTO BlueHerondata.DX
  ( patient_num
  )
SELECT *
FROM
  ( SELECT DISTINCT patient_num
  FROM BlueHerondata.QUERY_GLOBAL_TEMP
  WHERE panel_count = 3
  ) q;
  
/*******************************************************************************
Now, exclude females - Same issue occurs
(eth:hispanic/latino OR race:black/african) AND HICTR (Frontiers) AND !(females)

Here's what I now have set:

Panel 1:
Eth: Hispanic/Latino
Race: Black/African American

Panel 2:
HICTR Participant

Panel 3 (exclude):
females
*******************************************************************************/
-- Start with everybody
INSERT INTO BlueHerondata.QUERY_GLOBAL_TEMP
  ( patient_num , panel_count
  )
SELECT DISTINCT patient_num ,1 FROM BlueHerondata.patient_dimension pat ;

UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count =-1
WHERE NOT EXISTS
  (SELECT patient_num ,
    1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\Ethnicity\Hispanic, Latino or Spanish Origin\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
  AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;

UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count =-1
WHERE NOT EXISTS
  (SELECT patient_num ,
    1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\Race\Black or African American\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
  AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;

UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count =2
WHERE EXISTS
  (SELECT 1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\HICTR Participant\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
  AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;

UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count = 3
WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 2 ;

UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
SET panel_count = -1
WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 3
AND EXISTS
  (SELECT 1 AS panel_count
  FROM
    (SELECT
      /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
      patient_num
    FROM BlueHerondata.observation_fact
    WHERE concept_cd IN
      (SELECT concept_cd
      FROM BlueHerondata.concept_dimension
      WHERE concept_path LIKE '\i2b2\Demographics\Gender\Female\%'
      )
    GROUP BY patient_num
    ) t
  WHERE BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
  ) ;

INSERT INTO BlueHerondata.DX
  ( patient_num
  )
SELECT *
FROM
  ( SELECT DISTINCT patient_num
  FROM BlueHerondata.QUERY_GLOBAL_TEMP
  WHERE panel_count = 3
  ) q;


 Comments   
Comment by Nathan Graham [ 30/Apr/12 ]
We are running version 1.6.02 - I had marked version 1.6 in the ticket.
Comment by Nathan Graham [ 15/May/12 ]
I verified that this issue is resolved in 1.6.03.
Comment by Janice Donahoe [ 29/Jan/13 ]
This was fixed and released as part of the 1.6.03 Release
Generated at Wed Apr 09 07:47:43 UTC 2025 using Jira 8.20.11#820011-sha1:0629dd8d260e3954ece49053e565d01dabe11609.