@article{JMAI11322,
author = {Prottoy Saha and Mohammad Rakib and Fardin Abu Ubaid and Muhtasib Ibtida Kousik and Syeda Anika Tasnim and Rifath Mahmud and Tanvir Ahmed},
title = {A data driven analysis of maternal health risk indicators using machine learning techniques},
journal = {Journal of Medical Artificial Intelligence},
volume = {9},
number = {0},
year = {2026},
keywords = {},
abstract = {Background: Limited clinical resources and subjective risk assessment processes sometimes make it difficult to identify high-risk pregnancies early in the low and middle income countries, where maternal mortality is still a major global health concern. Using health indicators during routine antenatal checkups, machine learning (ML) presents a potential method for objective, data-driven risk classification. Feature importance analysis identified systolic blood pressure, age, and blood sugar (BS) as the most significant predictors of maternal health risk. Therefore, this study aims to develop and evaluate machine learning-based classification models for early and objective identification of high-risk pregnancies using routine antenatal health indicators in low and middle income country settings, and to compare the predictive performance of multiple supervised machine learning algorithms in maternal health risk stratification and establish a framework to support clinical decision-making in resource-limited obstetric care.Methods: An organized dataset comprising 1,017 patient records with 12 clinical and demographic characteristics, such as maternal age, blood pressure, BS, body temperature, heart rate, hemoglobin, and a number of categorical factors such as previous complications, was used in this investigation. Feature selection was carried out using recursive elimination approaches following preprocessing, which included median imputation, outlier treatment, label encoding, and z-score normalization. Five ML algorithms are: Logistic Regression, Decision Tree (DT), Random Forest, Support Vector Machine (SVM) and an Ensemble Voting Classifier were created and assessed. Among them 70% of the data from the dataset was used to train the models and 20% was used for tuning, and the remaining 10% was used for testing. This article focuses on accuracy, precision, recall, F1-score, confusion matrices, and receiver operating characteristic (ROC)-area under the curve (AUC) as the performance indicators.Results: According to experimental results, the Random Forest method used 10-fold cross-validation to produce a superior mean accuracy of 99% in validation and 100% accuracy in test. Additionally, a framework for quantifying uncertainty was put into place, identifying 1.27% of forecasts that needed manual clinical assessment in order to guarantee patient safety.Conclusions: Early risk identification may be greatly improved by using ML approaches into maternal healthcare analytics. Specifically, the Random Forest and Ensemble models provide scalable, interpretable, and therapeutically relevant solutions that may be used in the real world. The utility of ML-driven technologies as supplemental assistance in obstetric risk management techniques is supported by these findings.},
issn = {2617-2496}, url = {https://jmai.amegroups.org/article/view/11322}
}