IDRT - Integrated Data Repository Toolkit
Space shortcuts
Space Tools

Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

The following SQL Script can be exected at the end of an ETL (extraction, transformation, loading) pathway to calculate all value for the C_TOTALNUM column automatically. The script was developed on an Oracle 11g platform but should be easily portable to other relational databases.

Info
titleCoverage

Please note that the script will calculate the patient counts only over facts stored in the OBSERVATION_FACT table. If you installation contains fact data in other tables (e.g. PATIENT_DIMENSION) the script will need to be adapted to your individual configuration.

Code Block
languagesql
collapsetrue
-- SQL Script to calculate the column i2b2.c_totalnum

-- 1. Backup the i2b2 table
CREATE TABLE t_i2b2 AS (SELECT * FROM i2b2);

-- 2a. Calculate count of distinct patients for all branches and leaves of concept hierarchy (regular concepts
CREATE TABLE t_totalnum AS
SELECT i2b.c_tablename,
       i2b.c_fullname,
       COUNT(DISTINCT obs.patient_num) AS c_totalnum
  FROM observation_fact  obs
  JOIN concept_dimension con ON obs.concept_cd   =    con.concept_cd
  JOIN i2b2              i2b ON con.concept_path LIKE i2b.c_fullname || '%'
 GROUP BY i2b.c_tablename,i2b.c_fullname;

-- 2b. (Optionally) Calculate count of distinct patients for all branches and leaves of concept hierarchy (modifier concepts
INSERT INTO t_totalnum (c_tablename, c_fullname, c_totalnum)
SELECT i2b.c_tablename,
       i2b.c_fullname,
       COUNT(DISTINCT obs.patient_num) AS c_totalnum
  FROM observation_fact   obs
  JOIN modifier_dimension con ON obs.modifier_cd   =    con.modifier_cd
  JOIN i2b2               i2b ON con.modifier_path LIKE i2b.c_fullname || '%'
 GROUP BY i2b.c_tablename,i2b.c_fullname;

-- 3. Truncate i2b2 table
TRUNCATE TABLE i2b2;

-- 4. Repopulate the i2b2 table from its backup and the generated counts
INSERT INTO i2b2 (c_hlevel, c_fullname, c_name, c_synonym_cd,
c_visualattributes, c_totalnum, c_basecode, c_metadataxml,
c_facttablecolumn, c_tablename, c_columnname, c_columndatatype,
c_operator, c_dimcode, c_comment, c_tooltip, m_applied_path,
update_date, download_date, import_date, sourcesystem_cd, valuetype_cd,
m_exclusion_cd, c_path, c_symbol)
SELECT i2b.c_hlevel,
i2b.c_fullname, i2b.c_name, i2b.c_synonym_cd, i2b.c_visualattributes,
NVL(tnm.c_totalnum, 0), i2b.c_basecode, i2b.c_metadataxml,
i2b.c_facttablecolumn, i2b.c_tablename, i2b.c_columnname,
i2b.c_columndatatype, i2b.c_operator, i2b.c_dimcode, i2b.c_comment,
i2b.c_tooltip, i2b.m_applied_path, i2b.update_date, i2b.download_date,
i2b.import_date, i2b.sourcesystem_cd, i2b.valuetype_cd,
i2b.m_exclusion_cd, i2b.c_path, i2b.c_symbol
  FROM      t_i2b2     i2b
  LEFT JOIN t_totalnum tnm ON i2b.c_tablename = tnm.c_tablename AND i2b.c_fullname = tnm.c_fullname;

-- 5. Drop temporary tables
DROP TABLE t_totalnum;
DROP TABLE t_i2b2;

-- 6. Commit changes
COMMIT;

...

Code Block
languagesql
collapsetrue

DROP TABLE "OBSERVATION_FACT" cascade constraints;
CREATE TABLE "OBSERVATION_FACT"
    (           "ENCOUNTER_NUM" NUMBER(38,0) NOT NULL ENABLE,
                "PATIENT_NUM" NUMBER(38,0) NOT NULL ENABLE,
                "CONCEPT_CD" VARCHAR2(50 BYTE) NOT NULL ENABLE,
                "PROVIDER_ID" VARCHAR2(50 BYTE) NOT NULL ENABLE,
                "START_DATE" DATE NOT NULL ENABLE,
                "MODIFIER_CD" VARCHAR2(100 BYTE) NOT NULL ENABLE,
                "INSTANCE_NUM" NUMBER(18,0) NOT NULL ENABLE,
                "VALTYPE_CD" VARCHAR2(50 BYTE),
                "TVAL_CHAR" VARCHAR2(255 CHAR),
                "NVAL_NUM" NUMBER(18,5),
                "VALUEFLAG_CD" VARCHAR2(50 BYTE),
                "QUANTITY_NUM" NUMBER(18,5),
                "UNITS_CD" VARCHAR2(50 BYTE),
                "END_DATE" DATE,
                "LOCATION_CD" VARCHAR2(50 BYTE),
                "OBSERVATION_BLOB" CLOB,
                "CONFIDENCE_NUM" NUMBER(18,5),
                "UPDATE_DATE" DATE,
                "DOWNLOAD_DATE" DATE,
                "IMPORT_DATE" DATE,
                "SOURCESYSTEM_CD" VARCHAR2(50 BYTE),
                "UPLOAD_ID" NUMBER(38,0),
                CONSTRAINT "OBSERVATION_FACT_PK" PRIMARY KEY ("ENCOUNTER_NUM", "CONCEPT_CD", "PROVIDER_ID", "START_DATE", "MODIFIER_CD", "INSTANCE_NUM") USING INDEX LOCAL ENABLE
   )
LOB ("OBSERVATION_BLOB") STORE AS BASICFILE (ENABLE STORAGE IN ROW CHUNK 8192 RETENTION)
PARTITION BY RANGE ("CONCEPT_CD")
   (
   PARTITION BIO VALUES LESS THAN ('DEM'), -- "BIO" partition for biomaterials
   PARTITION DEM VALUES LESS THAN ('ENC'), -- "DEM" partition for demographics
   PARTITION ENC VALUES LESS THAN ('ICD'), -- "ENC" partition for encounter data
   PARTITION ICD VALUES LESS THAN ('LAB'), -- "ICD" partition for diagnoses
   PARTITION LAB VALUES LESS THAN ('OPS'), -- "LAB" partition for lab findings
   PARTITION OPS VALUES LESS THAN ('ORG'), -- "OPS" partition for procedures
   PARTITION ORG VALUES LESS THAN ('PAT'), -- "ORG" partition for organizational data
   PARTITION PAT VALUES LESS THAN ('RAD'), -- "PAT" partition for pathology findings
   PARTITION RAD VALUES LESS THAN ('THE'), -- "RAD" partition for radiology findings
   PARTITION THE VALUES LESS THAN ('ZZZ')  -- "THE" partition for therapy data
   );
CREATE INDEX of_ctx_blob                 ON observation_fact (observation_blob) indextype is ctxsys.context  parameters ('sync (on commit)');
CREATE INDEX fact_nolob                  ON observation_fact (patient_num, start_date, concept_cd, encounter_num, instance_num, nval_num, tval_char, valtype_cd, modifier_cd, valueflag_cd, provider_id, quantity_num, units_cd, end_date, location_cd, confidence_num, update_date, download_date, import_date, sourcesystem_cd, upload_id) LOCAL;
CREATE INDEX fact_patcon_date_prvd_idx   ON observation_fact (patient_num, concept_cd, start_date, end_date, encounter_num, instance_num, provider_id, nval_num, valtype_cd) LOCAL;
CREATE INDEX idrt_fact_cnpt_pat_enct_idx ON observation_fact (concept_cd, instance_num, patient_num, encounter_num) LOCAL;
CREATE INDEX idrt_fact_mdf_pat_enct_idx  ON observation_fact (modifier_cd, instance_num, patient_num, encounter_num) LOCAL;

...