forked from arnepeine/ventai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
getWeight.sql
188 lines (181 loc) · 5.26 KB
/
getWeight.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
--Code retrieved from https://github.com/MIT-LCP/mimic-code/blob/master/concepts/durations/weight-durations.sql
DROP MATERIALIZED VIEW IF EXISTS getWeight2 ;
CREATE MATERIALIZED VIEW getWeight2 as
-- This query extracts weights for adult ICU patients with start/stop times
-- if an admission weight is given, then this is assigned from intime to outtime
with wt_stg as
(
SELECT
c.icustay_id
, c.charttime
, case when c.itemid in (762,226512) then 'admit'
else 'daily' end as weight_type
-- TODO: eliminate obvious outliers if there is a reasonable weight
, c.valuenum as weight
FROM mimiciii.chartevents c
WHERE c.valuenum IS NOT NULL
AND c.itemid in
(
762,226512 -- Admit Wt
,763,224639 -- Daily Weight
)
AND c.valuenum != 0
-- exclude rows marked as error
AND c.error IS DISTINCT FROM 1
)
-- assign ascending row number
, wt_stg1 as
(
select
icustay_id
, charttime
, weight_type
, weight
, ROW_NUMBER() OVER (partition by icustay_id, weight_type order by charttime) as rn
from wt_stg
)
-- change charttime to starttime - for admit weight, we use ICU admission time
, wt_stg2 as
(
select
wt_stg1.icustay_id
, ie.intime, ie.outtime
, case when wt_stg1.weight_type = 'admit' and wt_stg1.rn = 1
then ie.intime - interval '2' hour
else wt_stg1.charttime end as starttime
, wt_stg1.weight
from mimiciii.icustays ie
inner join wt_stg1
on ie.icustay_id = wt_stg1.icustay_id
where not (weight_type = 'admit' and rn = 1)
)
, wt_stg3 as
(
select
icustay_id
, starttime
, coalesce(
LEAD(starttime) OVER (PARTITION BY icustay_id ORDER BY starttime),
outtime + interval '2' hour
) as endtime
, weight
from wt_stg2
)
-- this table is the start/stop times from admit/daily weight in charted data
, wt1 as
(
select
ie.icustay_id
, wt.starttime
, case when wt.icustay_id is null then null
else
coalesce(wt.endtime,
LEAD(wt.starttime) OVER (partition by ie.icustay_id order by wt.starttime),
-- we add a 2 hour "fuzziness" window
ie.outtime + interval '2' hour)
end as endtime
, wt.weight
from mimiciii.icustays ie
left join wt_stg3 wt
on ie.icustay_id = wt.icustay_id
)
-- if the intime for the patient is < the first charted daily weight
-- then we will have a "gap" at the start of their stay
-- to prevent this, we look for these gaps and backfill the first weight
-- this adds (153255-149657)=3598 rows, meaning this fix helps for up to 3598 icustay_id
, wt_fix as
(
select ie.icustay_id
-- we add a 2 hour "fuzziness" window
, ie.intime - interval '2' hour as starttime
, wt.starttime as endtime
, wt.weight
from mimiciii.icustays ie
inner join
-- the below subquery returns one row for each unique icustay_id
-- the row contains: the first starttime and the corresponding weight
(
select wt1.icustay_id, wt1.starttime, wt1.weight
from wt1
inner join
(
select icustay_id, min(Starttime) as starttime
from wt1
group by icustay_id
) wt2
on wt1.icustay_id = wt2.icustay_id
and wt1.starttime = wt2.starttime
) wt
on ie.icustay_id = wt.icustay_id
and ie.intime < wt.starttime
)
, wt2 as
(
select
wt1.icustay_id
, wt1.starttime
, wt1.endtime
, wt1.weight
from wt1
UNION
SELECT
wt_fix.icustay_id
, wt_fix.starttime
, wt_fix.endtime
, wt_fix.weight
from wt_fix
)
-- get more weights from echo - completes data for ~2500 patients
-- we only use echo data if there is *no* charted data
-- we impute the median echo weight for their entire ICU stay
-- only ~762 patients remain with no weight data
, echo_lag as
(
select
ie.icustay_id
, ie.intime, ie.outtime
, 0.453592*ec.weight as weight_echo
, ROW_NUMBER() OVER (PARTITION BY ie.icustay_id ORDER BY ec.charttime) as rn
, ec.charttime as starttime
, LEAD(ec.charttime) OVER (PARTITION BY ie.icustay_id ORDER BY ec.charttime) as endtime
from mimiciii.icustays ie
inner join echodata ec
on ie.hadm_id = ec.hadm_id
where ec.weight is not null
)
, echo_final as
(
select
el.icustay_id
, el.starttime
-- we add a 2 hour "fuzziness" window
, coalesce(el.endtime,el.outtime + interval '2' hour) as endtime
, weight_echo
from echo_lag el
UNION
-- if the starttime was later than ICU admission, back-propogate the weight
select
el.icustay_id
, el.intime - interval '2' hour as starttime
, el.starttime as endtime
, el.weight_echo
from echo_lag el
where el.rn = 1
and el.starttime > el.intime - interval '2' hour
)
select
wt2.icustay_id,ic.subject_id,ic.hadm_id, avg(wt2.weight) as weight
from wt2
INNER JOIN mimiciii.icustays ic
ON wt2.icustay_id=ic.icustay_id
GROUP BY wt2.icustay_id,ic.subject_id,ic.hadm_id
UNION
-- only add echos if we have no charted weight data
select
ef.icustay_id, ic.subject_id,ic.hadm_id, avg(ef.weight_echo) as weight
from echo_final ef
INNER JOIN mimiciii.icustays ic
ON ef.icustay_id=ic.icustay_id
where ef.icustay_id not in (select distinct icustay_id from wt2)
group by ef.icustay_id,ic.subject_id,ic.hadm_id
order by icustay_id,subject_id,hadm_id