{
    "dmp": {
        "title": "Predicting Road Accident Severity in Great Britain",
        "description": "Data Management Plan for a machine learning experiment that predicts the severity of road traffic collisions in Great Britain using the UK Department for Transport STATS19 open dataset (2020-2024). The experiment trains a Gradient Boosting classifier and evaluates it on held-out test data, producing trained model artefacts and evaluation outputs.",
        "language": "eng",
        "created": "2026-05-29",
        "modified": "2026-05-29",
        "dmp_id": {
            "identifier": "https://doi.org/10.70124/545b4-t1166",
            "type": "doi"
        },
        "contact": {
            "name": "El Dib, Yehea",
            "mbox": "e12450748@student.tuwien.ac.at",
            "contact_id": {
                "identifier": "https://orcid.org/0009-0003-8506-0271",
                "type": "orcid"
            }
        },
        "contributor": [
            {
                "name": "El Dib, Yehea",
                "mbox": "e12450748@student.tuwien.ac.at",
                "role": [
                    "data_manager",
                    "project_leader"
                ],
                "contributor_id": {
                    "identifier": "https://orcid.org/0009-0003-8506-0271",
                    "type": "orcid"
                }
            },
            {
                "name": "Charles, Logan",
                "mbox": "e12550259@student.tuwien.ac.at",
                "role": [
                    "researcher"
                ],
                "contributor_id": {
                    "identifier": "https://orcid.org/0009-0002-3977-1286",
                    "type": "orcid"
                }
            },
            {
                "name": "Hardt, Julian",
                "mbox": "e12330562@student.tuwien.ac.at",
                "role": [
                    "researcher"
                ],
                "contributor_id": {
                    "identifier": "https://orcid.org/0009-0003-0171-5796",
                    "type": "orcid"
                }
            },
            {
                "name": "H\u00f6finger, Balthasar",
                "mbox": "e11908607@student.tuwien.ac.at",
                "role": [
                    "researcher"
                ],
                "contributor_id": {
                    "identifier": "https://orcid.org/0009-0000-2002-4200",
                    "type": "orcid"
                }
            }
        ],
        "cost": [],
        "project": [
            {
                "title": "Predicting Road Accident Severity in Great Britain",
                "description": "Student project for the FAIR Data Science / Data Stewardship 2026SS course at TU Wien. The project develops a fully documented, FAIR-compliant open-science machine learning experiment predicting road collision severity using UK Government open data. Zenodo DOI: https://doi.org/10.5281/zenodo.20416076",
                "start": "2026-02-01",
                "end": "2026-06-30",
                "funding": [
                    {
                        "funder_id": {
                            "identifier": "https://ror.org/04d836q62",
                            "type": "ror"
                        },
                        "funding_status": "planned",
                        "grant_id": {
                            "identifier": "N/A - student course project",
                            "type": "other"
                        }
                    }
                ]
            }
        ],
        "ethical_issues_exist": "no",
        "ethical_issues_description": "The dataset consists entirely of aggregated road collision statistics published by an official government body. No personal data are processed. GDPR does not apply.",
        "ethical_issues_report": "No ethical review required.",
        "dataset": [
            {
                "dataset_id": {
                    "identifier": "https://www.gov.uk/government/statistical-data-sets/road-safety-open-data",
                    "type": "url"
                },
                "title": "STATS19 Road Safety Open Dataset (2020-2024)",
                "description": "Police-reported road traffic collision records from Great Britain covering 2020-2024, collected via the STATS19 reporting system and published by the UK Department for Transport. Approximately 503,000 rows with 44 attributes per collision including severity, location, road conditions, weather, and vehicle/casualty counts. Downloaded once from the UK Government open data portal and imported into TU Wien DBRepo.",
                "type": "dataset",
                "format": [
                    "text/csv"
                ],
                "keyword": [
                    "road safety",
                    "collision severity",
                    "STATS19",
                    "UK traffic data",
                    "open government data"
                ],
                "language": "eng",
                "issued": "2019-01-01",
                "personal_data": "no",
                "sensitive_data": "no",
                "is_reused": "reused",
                "data_quality_assurance": [
                    "Row counts and class distributions verified at every pipeline step.",
                    "DBRepo view row count verified against X-Count response header before processing.",
                    "Fixed random_state=42 used throughout for reproducibility.",
                    "SMOTE applied to training set only to handle class imbalance."
                ],
                "distribution": [
                    {
                        "title": "STATS19 Road Safety Open Dataset \u2014 UK Government open data portal",
                        "access_url": "https://www.gov.uk/government/statistical-data-sets/road-safety-open-data",
                        "available_until": "2036-06-30",
                        "byte_size": 524288000,
                        "data_access": "open",
                        "format": [
                            "text/csv"
                        ],
                        "license": [
                            {
                                "license_ref": "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/",
                                "start_date": "2019-01-01"
                            }
                        ]
                    },
                    {
                        "title": "STATS19 Road Safety Open Dataset \u2014 TU Wien DBRepo",
                        "access_url": "https://test.dbrepo.tuwien.ac.at/database/82c19b39-246c-4409-b25c-8baf3a158a70",
                        "available_until": "2036-06-30",
                        "data_access": "open",
                        "format": [
                            "application/json"
                        ],
                        "license": [
                            {
                                "license_ref": "https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/",
                                "start_date": "2026-05-11"
                            }
                        ],
                        "host": {
                            "title": "TU Wien DBRepo",
                            "url": "https://test.dbrepo.tuwien.ac.at",
                            "supports_versioning": "yes",
                            "pid_system": [
                                "doi"
                            ],
                            "storage_type": "relational database"
                        }
                    }
                ],
                "security_and_privacy": [
                    {
                        "title": "No special measures required",
                        "description": "The dataset contains no personal or sensitive data. Standard open-access permissions apply."
                    }
                ]
            },
            {
                "dataset_id": {
                    "identifier": "https://doi.org/10.70124/s4hn9-sqv24",
                    "type": "doi"
                },
                "title": "UK Collision Severity Prediction \u2014 Processed Data Splits",
                "description": "Train (70%), validation (15%), and test (15%) CSV splits produced from the STATS19 input data by 01_load_data.py and 02_preprocess.py. Split is stratified by collision_severity with fixed random_state=42. train_resampled.csv includes SMOTE oversampling of minority classes. Each file contains 15 ML features plus the collision_severity label.",
                "type": "dataset",
                "format": [
                    "text/csv"
                ],
                "keyword": [
                    "road safety",
                    "collision severity",
                    "machine learning",
                    "train test split"
                ],
                "language": "eng",
                "personal_data": "no",
                "sensitive_data": "no",
                "is_reused": "produced",
                "data_quality_assurance": [
                    "Stratified split ensures class distribution is preserved across all three sets.",
                    "Row counts printed after each split step to catch silent data loss."
                ],
                "distribution": [
                    {
                        "title": "Processed data splits \u2014 TU Wien Research Data Repository",
                        "available_until": "2036-06-30",
                        "data_access": "open",
                        "format": [
                            "text/csv"
                        ],
                        "license": [
                            {
                                "license_ref": "https://creativecommons.org/licenses/by/4.0/",
                                "start_date": "2026-05-29"
                            }
                        ],
                        "host": {
                            "title": "TU Wien Research Data Repository (test instance)",
                            "url": "https://test.researchdata.tuwien.ac.at",
                            "supports_versioning": "yes",
                            "pid_system": [
                                "doi"
                            ],
                            "certified_with": "CoreTrustSeal",
                            "storage_type": "institutional repository"
                        },
                        "access_url": "https://doi.org/10.70124/s4hn9-sqv24"
                    },
                    {
                        "title": "GitHub repository \u2014 Zenodo DOI",
                        "access_url": "https://doi.org/10.5281/zenodo.20416076",
                        "available_until": "2036-06-30",
                        "data_access": "open",
                        "license": [
                            {
                                "license_ref": "https://spdx.org/licenses/MIT.html",
                                "start_date": "2026-05-29"
                            }
                        ],
                        "host": {
                            "title": "Zenodo",
                            "url": "https://zenodo.org",
                            "supports_versioning": "yes",
                            "pid_system": [
                                "doi"
                            ],
                            "storage_type": "open repository"
                        }
                    }
                ]
            },
            {
                "dataset_id": {
                    "identifier": "https://doi.org/10.70124/cghhd-yb573",
                    "type": "doi"
                },
                "title": "UK Collision Severity Prediction \u2014 Trained Gradient Boosting Model",
                "description": "Serialised scikit-learn GradientBoostingClassifier trained on the SMOTE-resampled training set to predict collision severity (Fatal / Serious / Slight). Selected from three candidates (Decision Tree, Random Forest, Gradient Boosting) by Macro F1 on the validation set. Hyperparameters: n_estimators=150, max_depth=6, learning_rate=0.1, random_state=42. Test set performance: accuracy 0.676, macro F1 0.403.",
                "type": "model",
                "format": [
                    "application/octet-stream"
                ],
                "keyword": [
                    "road safety",
                    "collision severity",
                    "gradient boosting",
                    "scikit-learn",
                    "machine learning model"
                ],
                "language": "eng",
                "personal_data": "no",
                "sensitive_data": "no",
                "is_reused": "produced",
                "data_quality_assurance": [
                    "Model selected by Macro F1 score on held-out validation set.",
                    "Test set used only once for final evaluation.",
                    "FAIR4ML metadata documents all hyperparameters and evaluation metrics."
                ],
                "distribution": [
                    {
                        "title": "Trained model \u2014 TU Wien Research Data Repository",
                        "available_until": "2036-06-30",
                        "data_access": "open",
                        "format": [
                            "application/octet-stream"
                        ],
                        "license": [
                            {
                                "license_ref": "https://creativecommons.org/licenses/by/4.0/",
                                "start_date": "2026-05-29"
                            }
                        ],
                        "host": {
                            "title": "TU Wien Research Data Repository (test instance)",
                            "url": "https://test.researchdata.tuwien.ac.at",
                            "supports_versioning": "yes",
                            "pid_system": [
                                "doi"
                            ],
                            "certified_with": "CoreTrustSeal",
                            "storage_type": "institutional repository"
                        },
                        "access_url": "https://doi.org/10.70124/cghhd-yb573"
                    },
                    {
                        "title": "GitHub repository \u2014 Zenodo DOI",
                        "access_url": "https://doi.org/10.5281/zenodo.20416076",
                        "available_until": "2036-06-30",
                        "data_access": "open",
                        "license": [
                            {
                                "license_ref": "https://spdx.org/licenses/MIT.html",
                                "start_date": "2026-05-29"
                            }
                        ],
                        "host": {
                            "title": "Zenodo",
                            "url": "https://zenodo.org",
                            "supports_versioning": "yes",
                            "pid_system": [
                                "doi"
                            ],
                            "storage_type": "open repository"
                        }
                    }
                ]
            },
            {
                "dataset_id": {
                    "identifier": "https://doi.org/10.70124/s4hn9-sqv24",
                    "type": "doi"
                },
                "title": "UK Collision Severity Prediction \u2014 Model Evaluation Outputs",
                "description": "Evaluation figures and predictions produced by 03_train_classifier.py and 04_evaluate.py: test_predictions_2026-05-25.csv (predicted vs. actual labels for 17,220 test samples), 01_data_understanding.png, 02_class_imbalance.png, 03_confusion_matrix.png, 04_performance_comparison.png, 05_feature_importance.png.",
                "type": "dataset",
                "format": [
                    "text/csv",
                    "image/png"
                ],
                "keyword": [
                    "road safety",
                    "collision severity",
                    "confusion matrix",
                    "feature importance",
                    "evaluation"
                ],
                "language": "eng",
                "personal_data": "no",
                "sensitive_data": "no",
                "is_reused": "produced",
                "distribution": [
                    {
                        "title": "Model evaluation outputs \u2014 TU Wien Research Data Repository",
                        "access_url": "https://doi.org/10.70124/s4hn9-sqv24",
                        "available_until": "2036-06-30",
                        "data_access": "open",
                        "format": [
                            "text/csv",
                            "image/png"
                        ],
                        "license": [
                            {
                                "license_ref": "https://creativecommons.org/licenses/by/4.0/",
                                "start_date": "2026-05-29"
                            }
                        ],
                        "host": {
                            "title": "TU Wien Research Data Repository (test instance)",
                            "url": "https://test.researchdata.tuwien.ac.at",
                            "supports_versioning": "yes",
                            "pid_system": [
                                "doi"
                            ],
                            "certified_with": "CoreTrustSeal",
                            "storage_type": "institutional repository"
                        }
                    },
                    {
                        "title": "Model evaluation outputs \u2014 GitHub",
                        "access_url": "https://github.com/b4lz2/uk-collision-severity-prediction",
                        "available_until": "2036-06-30",
                        "data_access": "open",
                        "format": [
                            "image/png"
                        ],
                        "license": [
                            {
                                "license_ref": "https://spdx.org/licenses/MIT.html",
                                "start_date": "2026-05-29"
                            }
                        ]
                    },
                    {
                        "title": "GitHub repository \u2014 Zenodo DOI",
                        "access_url": "https://doi.org/10.5281/zenodo.20416076",
                        "available_until": "2036-06-30",
                        "data_access": "open",
                        "license": [
                            {
                                "license_ref": "https://spdx.org/licenses/MIT.html",
                                "start_date": "2026-05-29"
                            }
                        ],
                        "host": {
                            "title": "Zenodo",
                            "url": "https://zenodo.org",
                            "supports_versioning": "yes",
                            "pid_system": [
                                "doi"
                            ],
                            "storage_type": "open repository"
                        }
                    }
                ]
            }
        ]
    }
}