Uploaded image for project: 'i2b2 Core Software'
  1. i2b2 Core Software
  2. CORE-63

"Exclude" creates incorrect query.

    XMLWordPrintable

Details

    • Bug
    • Status: Closed
    • Major
    • Resolution: Fixed
    • 1.6.00
    • 1.6.03
    • CRC Cell
    • None
    • Rank:
      0|i000un:

    Description

      This appears to be similar to https://community.i2b2.org/jira/browse/LEOPARD-86.


      CORRECT SQL/Result:
      Panel 1: "Hispanic/Latino", "Black/African American"
      Panel 2: Frontiers Participant


      INCORRECT SQL/Result
      Panel 1: "Hispanic/Latino", "Black/African American"
      Panel 2: Frontiers Participant
      Panel 3: EXCLUDE ( "Deceased per KUMC records", "Deceased per SSA records")


      2nd Example of INCORRECT SQL/Result
      Panel 1: "Hispanic/Latino", "Black/African American"
      Panel 2: Frontiers Participant
      Panel 3: EXCLUDE "female"


      It apears that in the incorrect case, the generated SQL first ANDs the items in Panel 1 rather than ORs them.

      Please refer to the following link: http://informatics.kumc.edu/work/ticket/1075#comment:6 for screenshots, annotated SQL, XML traffic.

      Annotated SQL from QT_QUERY_MASTER:
      /*******************************************************************************
      Starting (eth:hispanic/latino OR race:black/african) AND HICTR (Frontiers)

      Here's what I have set:

      Panel 1:
      Eth: Hispanic/Latino
      Race: Black/African American

      Panel 2:
      HICTR Participant
      *******************************************************************************/

      --From i2b2 Query Master
      INSERT INTO BlueHerondata.QUERY_GLOBAL_TEMP
        ( patient_num , panel_count
        )
      SELECT patient_num ,
        1 AS panel_count
      FROM
        (SELECT
          /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
          patient_num
        FROM BlueHerondata.observation_fact
        WHERE concept_cd IN
          (SELECT concept_cd
          FROM BlueHerondata.concept_dimension
          WHERE concept_path LIKE '\i2b2\Demographics\Ethnicity\Hispanic, Latino or Spanish Origin\%'
          )
        GROUP BY patient_num
        ) t ;

      --Correct 'or'
        
      INSERT INTO BlueHerondata.QUERY_GLOBAL_TEMP
        ( patient_num , panel_count
        )
      SELECT patient_num ,
        1 AS panel_count
      FROM
        (SELECT
          /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
          patient_num
        FROM BlueHerondata.observation_fact
        WHERE concept_cd IN
          (SELECT concept_cd
          FROM BlueHerondata.concept_dimension
          WHERE concept_path LIKE '\i2b2\Demographics\Race\Black or African American\%'
          )
        GROUP BY patient_num
        ) t ;
        
      --Correct 'and'
        
      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count =2
      WHERE EXISTS
        (SELECT 1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\HICTR Participant\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
        AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;
        
        
      INSERT INTO BlueHerondata.DX
        ( patient_num
        )
      SELECT *
      FROM
        ( SELECT DISTINCT patient_num
        FROM BlueHerondata.QUERY_GLOBAL_TEMP
        WHERE panel_count = 2
        ) q;
        
      /*******************************************************************************
      Add "exclude" of deceased:
      (eth:hispanic/latino OR race:black/african) AND HICTR (Frontiers) AND !(deceased OR deceased via SSA)

      Here's what I have set:

      Panel 1:
      Eth: Hispanic/Latino
      Race: Black/African American

      Panel 2:
      HICTR Participant

      Panel 3 (exclude):
      Deceased per KUMC
      Deceased per SSA
      *******************************************************************************/
      --Start with every distinct patient from patient_dimension
      INSERT INTO BlueHerondata.QUERY_GLOBAL_TEMP
        ( patient_num , panel_count
        )
      SELECT DISTINCT patient_num ,1 FROM BlueHerondata.patient_dimension pat ;

      --Checking
      select panel_count, count(*) from BlueHerondata.query_global_temp
      group by panel_count; --1.9million are 1 now - everyone

      --Set everyone to -1 who doesn't come up as Hipanic/Latino
      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count =-1
      WHERE NOT EXISTS
        (SELECT patient_num ,
          1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\Ethnicity\Hispanic, Latino or Spanish Origin\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
        AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;

      select panel_count, count(*) from BlueHerondata.query_global_temp
      group by panel_count; -- -1=1871058, 1=31043

      /*
      The below should be an OR...but it's actually an AND as we're just selecting from
      what was left from last time. So, we only get those who were coded as both.
      */
        
      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count =-1
      WHERE NOT EXISTS
        (SELECT patient_num ,
          1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\Race\Black or African American\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
        AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;

      select panel_count, count(*) from BlueHerondata.query_global_temp
      group by panel_count; -- -1=1901888, 1=213 <--213 coded as both

      /*
      The damage has been done - now we select from those who are HICTR (Frontiers)
      and move them to the next step.
      */
        
      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count =2
      WHERE EXISTS
        (SELECT 1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\HICTR Participant\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
        AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;

      /*
      Only 2 proceeded on to the next step.
      */
      select panel_count, count(*) from BlueHerondata.query_global_temp
      group by panel_count; -- 1=206, 2=7, -1=1901888


      /*
      Move everything to panel that is now panel 2
      */
      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count = 3
      WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 2 ;

      select panel_count, count(*) from BlueHerondata.query_global_temp
      group by panel_count; -- only 7 made it to 3

      /*
      Of those left, set everyone to -1 who is known to be deceased (KUMC)
      */

      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count = -1
      WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 3
      AND EXISTS
        (SELECT 1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\Vital Status\Deceased\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;

      --No rows updated

      /*
      Of those left, set everyone to -1 who is known to be deceased (SSA)
      */
      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count = -1
      WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 3
      AND EXISTS
        (SELECT 1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\Vital Status\Deceased per SSA\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;
        
      --0 rows updated

      INSERT INTO BlueHerondata.DX
        ( patient_num
        )
      SELECT *
      FROM
        ( SELECT DISTINCT patient_num
        FROM BlueHerondata.QUERY_GLOBAL_TEMP
        WHERE panel_count = 3
        ) q;
        
      /*******************************************************************************
      Now, exclude females - Same issue occurs
      (eth:hispanic/latino OR race:black/african) AND HICTR (Frontiers) AND !(females)

      Here's what I now have set:

      Panel 1:
      Eth: Hispanic/Latino
      Race: Black/African American

      Panel 2:
      HICTR Participant

      Panel 3 (exclude):
      females
      *******************************************************************************/
      -- Start with everybody
      INSERT INTO BlueHerondata.QUERY_GLOBAL_TEMP
        ( patient_num , panel_count
        )
      SELECT DISTINCT patient_num ,1 FROM BlueHerondata.patient_dimension pat ;

      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count =-1
      WHERE NOT EXISTS
        (SELECT patient_num ,
          1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\Ethnicity\Hispanic, Latino or Spanish Origin\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
        AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;

      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count =-1
      WHERE NOT EXISTS
        (SELECT patient_num ,
          1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\Race\Black or African American\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
        AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;

      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count =2
      WHERE EXISTS
        (SELECT 1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\HICTR Participant\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 1
        AND BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;

      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count = 3
      WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 2 ;

      UPDATE BlueHerondata.QUERY_GLOBAL_TEMP
      SET panel_count = -1
      WHERE BlueHerondata.QUERY_GLOBAL_TEMP.panel_count = 3
      AND EXISTS
        (SELECT 1 AS panel_count
        FROM
          (SELECT
            /*+ index(observation_fact fact_cnpt_pat_enct_idx) */
            patient_num
          FROM BlueHerondata.observation_fact
          WHERE concept_cd IN
            (SELECT concept_cd
            FROM BlueHerondata.concept_dimension
            WHERE concept_path LIKE '\i2b2\Demographics\Gender\Female\%'
            )
          GROUP BY patient_num
          ) t
        WHERE BlueHerondata.QUERY_GLOBAL_TEMP.patient_num = t.patient_num
        ) ;

      INSERT INTO BlueHerondata.DX
        ( patient_num
        )
      SELECT *
      FROM
        ( SELECT DISTINCT patient_num
        FROM BlueHerondata.QUERY_GLOBAL_TEMP
        WHERE panel_count = 3
        ) q;

      Attachments

        Activity

          People

            mem61 Mike Mendis
            ngraham Nathan Graham
            Votes:
            0 Vote for this issue
            Watchers:
            1 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: