Publications
Mao, Jialin; Goodney, Philip; Banerjee, Samprit; Kostic, Zoran; Smolderen, Kim; Mena-Hurtado, Carlos; Matheny, Michael E.
In: BMJ Surgery, Interventions, & Health Technologies, vol. 7, iss. 1, pp. e000387, 2025.
Abstract | Links | BibTeX | Tags: methodology, outcomes research, real-world data, vascular devices
@article{nokey,
title = {Neural network models for predicting readmission among patients undergoing peripheral vascular intervention using electronic health record data and clinical registry data},
author = {Jialin Mao and Philip Goodney and Samprit Banerjee and Zoran Kostic and Kim Smolderen and Carlos Mena-Hurtado and Michael E. Matheny},
doi = {10.1136/bmjsit-2025-000387},
year = {2025},
date = {2025-06-26},
journal = {BMJ Surgery, Interventions, & Health Technologies},
volume = {7},
issue = {1},
pages = {e000387},
abstract = {Objectives: To determine whether neural network models based on electronic health record (EHR) data can match and augment the performance of models based on clinical registry data in predicting readmission after peripheral vascular intervention (PVI).
Design: Observational cohort study.
Setting: Vascular Quality Initiative registry and INSIGHT Clinical Research Network EHR data from multiple academic institutions in New York City.
Participants: Patients undergoing PVI during January 1, 2013 to September 30, 2021.
Main outcome measures: Our outcome variable was 90-day readmission. We developed logistic regression (LR), multilevel perceptron (MLP), and recurrent neural network (RNN) models using registry alone, EHR data alone, and combined registry-EHR data. EHR data were evaluated using derived variables to match registry variables (EHR-derived data) and clinically meaningful code aggregation (EHR-direct data). Models were evaluated using area under the curve (AUC) for discrimination, Spiegelhalter z score for calibration, and Brier score for overall performance.
Results: The analytical cohort included 2348 patients undergoing PVI (mean age: 69.9±11.5 years). 832 (35%) patients were readmitted within 90 days. LR to predict 90-day readmission based on registry data alone had an AUC of 0.710, Spiegelhalter z score of 1.021, and Brier score of 0.211. MLP based on registry data alone had similar performance. MLP and RNN based on EHR-direct data (MLP: AUC=0.742, Spiegelhalter z=0.933, Brier=0.204; RNN: AUC=0.737, Spiegelhalter z=1.026, Brier=0.206) and registry+EHR-direct data (MLP: AUC=0.756, Spiegelhalter z=0.794, Brier=0.199; RNN: AUC=0.751, Spiegelhalter z=1.057, Brier=0.200) had improved performances. LR based on EHR-direct data and combined registry+EHR-direct data had worse performances.
Conclusions: EHR data, when used with neural network models, can be useful to establish readmission predictive models or augment clinical registry data. EHR-based models can be potentially embedded in the clinical workflow, but model performance may be constrained by the absence of certain information in clinical encounters, such as social determinants of health.},
keywords = {methodology, outcomes research, real-world data, vascular devices},
pubstate = {published},
tppubtype = {article}
}
Design: Observational cohort study.
Setting: Vascular Quality Initiative registry and INSIGHT Clinical Research Network EHR data from multiple academic institutions in New York City.
Participants: Patients undergoing PVI during January 1, 2013 to September 30, 2021.
Main outcome measures: Our outcome variable was 90-day readmission. We developed logistic regression (LR), multilevel perceptron (MLP), and recurrent neural network (RNN) models using registry alone, EHR data alone, and combined registry-EHR data. EHR data were evaluated using derived variables to match registry variables (EHR-derived data) and clinically meaningful code aggregation (EHR-direct data). Models were evaluated using area under the curve (AUC) for discrimination, Spiegelhalter z score for calibration, and Brier score for overall performance.
Results: The analytical cohort included 2348 patients undergoing PVI (mean age: 69.9±11.5 years). 832 (35%) patients were readmitted within 90 days. LR to predict 90-day readmission based on registry data alone had an AUC of 0.710, Spiegelhalter z score of 1.021, and Brier score of 0.211. MLP based on registry data alone had similar performance. MLP and RNN based on EHR-direct data (MLP: AUC=0.742, Spiegelhalter z=0.933, Brier=0.204; RNN: AUC=0.737, Spiegelhalter z=1.026, Brier=0.206) and registry+EHR-direct data (MLP: AUC=0.756, Spiegelhalter z=0.794, Brier=0.199; RNN: AUC=0.751, Spiegelhalter z=1.057, Brier=0.200) had improved performances. LR based on EHR-direct data and combined registry+EHR-direct data had worse performances.
Conclusions: EHR data, when used with neural network models, can be useful to establish readmission predictive models or augment clinical registry data. EHR-based models can be potentially embedded in the clinical workflow, but model performance may be constrained by the absence of certain information in clinical encounters, such as social determinants of health.
Marsolo, Keith; Kiernan, Daniel; Toh, Sengwee; Phua, Jasmin; Louzao, Darcy; Haynes, Kevin; Weiner, Mark G.; Angulo, Francisco; Bailey, Charles; Bian, Jiang; Fort, Daniel; Grannis, Shaun J.; Krishnamurthy, Ashok Kumar; Nair, Vinit; Rivera, Pedro; Silverstein, Jonathan; Zirkle, Maryan; Carton, Thomas W.
In: Journal of the American Medical Informatics Association, vol. 30, iss. 3, pp. 447-455, 2023.
Abstract | Links | BibTeX | Tags: distributed research networks, privacy-preserving record linkage, real-world data
@article{nokey,
title = {Assessing the impact of privacy-preserving record linkage on record overlap and patient demographic and clinical characteristics in PCORnet®, the National Patient-Centered Clinical Research Network},
author = {Keith Marsolo and Daniel Kiernan and Sengwee Toh and Jasmin Phua and Darcy Louzao and Kevin Haynes and Mark G. Weiner and Francisco Angulo and Charles Bailey and Jiang Bian and Daniel Fort and Shaun J. Grannis and Ashok Kumar Krishnamurthy and Vinit Nair and Pedro Rivera and Jonathan Silverstein and Maryan Zirkle and Thomas W. Carton},
doi = { https://doi.org/10.1093/jamia/ocac229},
year = {2023},
date = {2023-02-16},
urldate = {2023-02-16},
journal = {Journal of the American Medical Informatics Association},
volume = {30},
issue = {3},
pages = {447-455},
abstract = {Objective: This article describes the implementation of a privacy-preserving record linkage (PPRL) solution across PCORnet®, the National Patient-Centered Clinical Research Network.
Material and methods: Using a PPRL solution from Datavant, we quantified the degree of patient overlap across the network and report a de-duplicated analysis of the demographic and clinical characteristics of the PCORnet population.
Results: There were ∼170M patient records across the responding Network Partners, with ∼138M (81%) of those corresponding to a unique patient. 82.1% of patients were found in a single partner and 14.7% were in 2. The percentage overlap between Partners ranged between 0% and 80% with a median of 0%. Linking patients' electronic health records with claims increased disease prevalence in every clinical characteristic, ranging between 63% and 173%.
Discussion: The overlap between Partners was variable and depended on timeframe. However, patient data linkage changed the prevalence profile of the PCORnet patient population.
Conclusions: This project was one of the largest linkage efforts of its kind and demonstrates the potential value of record linkage. Linkage between Partners may be most useful in cases where there is geographic proximity between Partners, an expectation that potential linkage Partners will be able to fill gaps in data, or a longer study timeframe.},
keywords = {distributed research networks, privacy-preserving record linkage, real-world data},
pubstate = {published},
tppubtype = {article}
}
Material and methods: Using a PPRL solution from Datavant, we quantified the degree of patient overlap across the network and report a de-duplicated analysis of the demographic and clinical characteristics of the PCORnet population.
Results: There were ∼170M patient records across the responding Network Partners, with ∼138M (81%) of those corresponding to a unique patient. 82.1% of patients were found in a single partner and 14.7% were in 2. The percentage overlap between Partners ranged between 0% and 80% with a median of 0%. Linking patients' electronic health records with claims increased disease prevalence in every clinical characteristic, ranging between 63% and 173%.
Discussion: The overlap between Partners was variable and depended on timeframe. However, patient data linkage changed the prevalence profile of the PCORnet patient population.
Conclusions: This project was one of the largest linkage efforts of its kind and demonstrates the potential value of record linkage. Linkage between Partners may be most useful in cases where there is geographic proximity between Partners, an expectation that potential linkage Partners will be able to fill gaps in data, or a longer study timeframe.
