{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "133f5019", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2025-05-22 16:13:34.152730: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2025-05-22 16:13:34.200579: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import json\n", "\n", "from utils.feature_engineering import *\n", "from utils.data_cleaning import *\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import HistGradientBoostingClassifier \n", "from sklearn.metrics import accuracy_score, classification_report, f1_score\n", "\n", "\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "from sentence_transformers import SentenceTransformer\n", "from geopy.geocoders import Nominatim\n", "from geopy.distance import geodesic\n", "import time \n", "import warnings \n", "\n", "\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "id": "4a470efe", "metadata": {}, "source": [ "### Load the data" ] }, { "cell_type": "code", "execution_count": 125, "id": "0306cd45", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | ID | \n", "Candidate State | \n", "Age Range | \n", "Residence | \n", "Sex | \n", "Protected category | \n", "TAG | \n", "Study area | \n", "Study Title | \n", "Years Experience | \n", "Sector | \n", "Last Role | \n", "Year of insertion | \n", "Year of Recruitment | \n", "Recruitment Request | \n", "Assumption Headquarters | \n", "Job Family Hiring | \n", "Job Title Hiring | \n", "event_type__val | \n", "event_feedback | \n", "linked_search__key | \n", "Overall | \n", "Job Description | \n", "Candidate Profile | \n", "Years Experience.1 | \n", "Minimum Ral | \n", "Ral Maximum | \n", "Study Level | \n", "Study Area.1 | \n", "Akkodis headquarters | \n", "Current Ral | \n", "Expected Ral | \n", "Technical Skills | \n", "Standing/Position | \n", "Comunication | \n", "Maturity | \n", "Dynamism | \n", "Mobility | \n", "English | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "71470 | \n", "Hired | \n", "31 - 35 years | \n", "TURIN » Turin ~ Piedmont | \n", "Male | \n", "NaN | \n", "AUTOSAR, CAN, C, C++, MATLAB/SIMULINK, VECTOR/... | \n", "Automation/Mechatronics Engineering | \n", "Five-year degree | \n", "[1-3] | \n", "Automotive | \n", "Diagnostic/Test engineer | \n", "[2018] | \n", "[2021] | \n", "E/E Diagnostic Integration Engineer - Automotive | \n", "Milan | \n", "Engineering | \n", "Consultant | \n", "Candidate notification | \n", "NaN | \n", "NaN | \n", "NaN | \n", "The candidate, inserted within a multidiscipli... | \n", "The ideal candidate has a degree in Electronic... | \n", "[1-3] | \n", "26-28K | \n", "30-32K | \n", "Five-year degree | \n", "electronic Engineering | \n", "Modena | \n", "22-24 K | \n", "24-26 K | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 1 | \n", "71470 | \n", "Hired | \n", "31 - 35 years | \n", "TURIN » Turin ~ Piedmont | \n", "Male | \n", "NaN | \n", "AUTOSAR, CAN, C, C++, MATLAB/SIMULINK, VECTOR/... | \n", "Automation/Mechatronics Engineering | \n", "Five-year degree | \n", "[1-3] | \n", "Automotive | \n", "Diagnostic/Test engineer | \n", "[2018] | \n", "[2021] | \n", "E/E Diagnostic Integration Engineer - Automotive | \n", "Milan | \n", "Engineering | \n", "Consultant | \n", "BM interview | \n", "NaN | \n", "RS18.0145 | \n", "NaN | \n", "The candidate, inserted within a multidiscipli... | \n", "The ideal candidate has a degree in Electronic... | \n", "[1-3] | \n", "26-28K | \n", "30-32K | \n", "Five-year degree | \n", "electronic Engineering | \n", "Modena | \n", "22-24 K | \n", "24-26 K | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 2 | \n", "71470 | \n", "Hired | \n", "31 - 35 years | \n", "TURIN » Turin ~ Piedmont | \n", "Male | \n", "NaN | \n", "AUTOSAR, CAN, C, C++, MATLAB/SIMULINK, VECTOR/... | \n", "Automation/Mechatronics Engineering | \n", "Five-year degree | \n", "[1-3] | \n", "Automotive | \n", "Diagnostic/Test engineer | \n", "[2018] | \n", "[2021] | \n", "E/E Diagnostic Integration Engineer - Automotive | \n", "Milan | \n", "Engineering | \n", "Consultant | \n", "Contact note | \n", "NaN | \n", "NaN | \n", "NaN | \n", "The candidate, inserted within a multidiscipli... | \n", "The ideal candidate has a degree in Electronic... | \n", "[1-3] | \n", "26-28K | \n", "30-32K | \n", "Five-year degree | \n", "electronic Engineering | \n", "Modena | \n", "22-24 K | \n", "24-26 K | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 3 | \n", "71470 | \n", "Hired | \n", "31 - 35 years | \n", "TURIN » Turin ~ Piedmont | \n", "Male | \n", "NaN | \n", "AUTOSAR, CAN, C, C++, MATLAB/SIMULINK, VECTOR/... | \n", "Automation/Mechatronics Engineering | \n", "Five-year degree | \n", "[1-3] | \n", "Automotive | \n", "Diagnostic/Test engineer | \n", "[2018] | \n", "[2021] | \n", "E/E Diagnostic Integration Engineer - Automotive | \n", "Milan | \n", "Engineering | \n", "Consultant | \n", "BM interview | \n", "OK | \n", "RS18.0114 | \n", "~ 2 - Medium | \n", "The candidate, inserted within a multidiscipli... | \n", "The ideal candidate has a degree in Electronic... | \n", "[1-3] | \n", "26-28K | \n", "30-32K | \n", "Five-year degree | \n", "electronic Engineering | \n", "Modena | \n", "22-24 K | \n", "24-26 K | \n", "2.0 | \n", "2.0 | \n", "1.0 | \n", "2.0 | \n", "2.0 | \n", "3.0 | \n", "3.0 | \n", "
| 4 | \n", "71470 | \n", "Hired | \n", "31 - 35 years | \n", "TURIN » Turin ~ Piedmont | \n", "Male | \n", "NaN | \n", "AUTOSAR, CAN, C, C++, MATLAB/SIMULINK, VECTOR/... | \n", "Automation/Mechatronics Engineering | \n", "Five-year degree | \n", "[1-3] | \n", "Automotive | \n", "Diagnostic/Test engineer | \n", "[2018] | \n", "[2021] | \n", "E/E Diagnostic Integration Engineer - Automotive | \n", "Milan | \n", "Engineering | \n", "Consultant | \n", "Commercial note | \n", "NaN | \n", "NaN | \n", "NaN | \n", "The candidate, inserted within a multidiscipli... | \n", "The ideal candidate has a degree in Electronic... | \n", "[1-3] | \n", "26-28K | \n", "30-32K | \n", "Five-year degree | \n", "electronic Engineering | \n", "Modena | \n", "22-24 K | \n", "24-26 K | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 21372 | \n", "79993 | \n", "Hired | \n", "26 - 30 years | \n", "TORRE ANNUNZIATA » Naples ~ Campania | \n", "Male | \n", "NaN | \n", "X | \n", "chemical engineering | \n", "Five-year degree | \n", "[0] | \n", "Others | \n", "Graduating student | \n", "[2023] | \n", "[2023] | \n", "Junior Project Engineer (C&Q) | \n", "Pomezia | \n", "Tech Consulting & Solutions | \n", "Consultant | \n", "HR interview | \n", "OK | \n", "RS23.0793 | \n", "~ 3 - High | \n", "The resource, included in a team dedicated to ... | \n", "The ideal candidate has a Master's Degree in C... | \n", "[0] | \n", "- 20K | \n", "- 20K | \n", "Five-year degree | \n", "chemical engineering | \n", "Pomezia | \n", "Not available | \n", "Not available | \n", "2.0 | \n", "2.0 | \n", "3.0 | \n", "3.0 | \n", "3.0 | \n", "3.0 | \n", "3.0 | \n", "
| 21373 | \n", "79993 | \n", "Hired | \n", "26 - 30 years | \n", "TORRE ANNUNZIATA » Naples ~ Campania | \n", "Male | \n", "NaN | \n", "X | \n", "chemical engineering | \n", "Five-year degree | \n", "[0] | \n", "Others | \n", "Graduating student | \n", "[2023] | \n", "[2023] | \n", "Junior Project Engineer (C&Q) | \n", "Pomezia | \n", "Tech Consulting & Solutions | \n", "Consultant | \n", "Candidate notification | \n", "NaN | \n", "NaN | \n", "NaN | \n", "The resource, included in a team dedicated to ... | \n", "The ideal candidate has a Master's Degree in C... | \n", "[0] | \n", "- 20K | \n", "- 20K | \n", "Five-year degree | \n", "chemical engineering | \n", "Pomezia | \n", "Not available | \n", "Not available | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 21374 | \n", "79993 | \n", "Hired | \n", "26 - 30 years | \n", "TORRE ANNUNZIATA » Naples ~ Campania | \n", "Male | \n", "NaN | \n", "X | \n", "chemical engineering | \n", "Five-year degree | \n", "[0] | \n", "Others | \n", "Graduating student | \n", "[2023] | \n", "[2023] | \n", "Junior Project Engineer (C&Q) | \n", "Pomezia | \n", "Tech Consulting & Solutions | \n", "Consultant | \n", "Candidate notification | \n", "NaN | \n", "NaN | \n", "NaN | \n", "The resource, included in a team dedicated to ... | \n", "The ideal candidate has a Master's Degree in C... | \n", "[0] | \n", "- 20K | \n", "- 20K | \n", "Five-year degree | \n", "chemical engineering | \n", "Pomezia | \n", "Not available | \n", "Not available | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 21375 | \n", "79993 | \n", "Hired | \n", "26 - 30 years | \n", "TORRE ANNUNZIATA » Naples ~ Campania | \n", "Male | \n", "NaN | \n", "X | \n", "chemical engineering | \n", "Five-year degree | \n", "[0] | \n", "Others | \n", "Graduating student | \n", "[2023] | \n", "[2023] | \n", "Junior Project Engineer (C&Q) | \n", "Pomezia | \n", "Tech Consulting & Solutions | \n", "Consultant | \n", "Technical interview | \n", "OK | \n", "RS23.0793 | \n", "~ 2 - Medium | \n", "The resource, included in a team dedicated to ... | \n", "The ideal candidate has a Master's Degree in C... | \n", "[0] | \n", "20K | \n", "- 20K | \n", "Five-year degree | \n", "chemical engineering | \n", "Pomezia | \n", "Not available | \n", "Not available | \n", "2.0 | \n", "2.0 | \n", "2.0 | \n", "2.0 | \n", "2.0 | \n", "3.0 | \n", "3.0 | \n", "
| 21376 | \n", "79993 | \n", "Hired | \n", "26 - 30 years | \n", "TORRE ANNUNZIATA » Naples ~ Campania | \n", "Male | \n", "NaN | \n", "X | \n", "chemical engineering | \n", "Five-year degree | \n", "[0] | \n", "Others | \n", "Graduating student | \n", "[2023] | \n", "[2023] | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Not available | \n", "Not available | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
21377 rows × 39 columns
\n", "| \n", " | Sex | \n", "Age Range | \n", "Protected category | \n", "Italian Residence | \n", "European Residence | \n", "TAG | \n", "Study area | \n", "Study Title | \n", "Years Experience | \n", "Sector | \n", "... | \n", "Minimum Ral | \n", "Ral Maximum | \n", "Study Level | \n", "Study Area.1 | \n", "Akkodis HQ Lat | \n", "Akkodis HQ Lng | \n", "Assumption HQ Lat | \n", "Assumption HQ Lng | \n", "number_of_searches | \n", "Hired | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Female | \n", "26 - 30 years | \n", "False | \n", "True | \n", "True | \n", "-, 3D PRINTING PREFORM SOFTWARE; PYTHON; ANSYS... | \n", "Biomedical Engineering | \n", "Five-year degree | \n", "[0] | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1258.0 | \n", "0 | \n", "
| 1 | \n", "Female | \n", "26 - 30 years | \n", "False | \n", "True | \n", "True | \n", "-, 3D PRINTING PREFORM SOFTWARE; PYTHON; ANSYS... | \n", "Biomedical Engineering | \n", "Five-year degree | \n", "[0] | \n", "NaN | \n", "... | \n", "19200.0 | \n", "19000.0 | \n", "Five-year degree | \n", "Chemist - Pharmaceutical | \n", "43.4667 | \n", "11.1500 | \n", "43.4667 | \n", "11.1500 | \n", "270.0 | \n", "0 | \n", "
| 2 | \n", "Female | \n", "< 20 years | \n", "False | \n", "True | \n", "True | \n", "PROJECT MANAGEMENT | \n", "Management Engineering | \n", "Five-year degree | \n", "[0] | \n", "Others | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1188.0 | \n", "0 | \n", "
| 3 | \n", "Female | \n", "< 20 years | \n", "False | \n", "True | \n", "True | \n", "PROJECT MANAGEMENT | \n", "Management Engineering | \n", "Five-year degree | \n", "[0] | \n", "Others | \n", "... | \n", "NaN | \n", "NaN | \n", "Five-year degree | \n", "electronic Engineering | \n", "44.6458 | \n", "10.9257 | \n", "44.6458 | \n", "10.9257 | \n", "696.0 | \n", "0 | \n", "
| 4 | \n", "Male | \n", "26 - 30 years | \n", "False | \n", "True | \n", "True | \n", "ANGULAR, JAVASCRIPT. | \n", "Informatics | \n", "Three-year degree | \n", "[1-3] | \n", "Telecom | \n", "... | \n", "23000.0 | \n", "29000.0 | \n", "Three-year degree | \n", "Informatics | \n", "45.4669 | \n", "9.1900 | \n", "44.4939 | \n", "11.3428 | \n", "337.0 | \n", "1 | \n", "
5 rows × 39 columns
\n", "| \n", " | Feature Set | \n", "Model | \n", "Train F1 | \n", "Test F1 | \n", "
|---|---|---|---|---|
| 0 | \n", "general_similarity_score_cross | \n", "LightGBM | \n", "0.946048 | \n", "0.767773 | \n", "
| 1 | \n", "general_similarity_score_cross | \n", "CatBoost | \n", "0.977951 | \n", "0.766990 | \n", "
| 2 | \n", "general_similarity_score_cross | \n", "Ensemble | \n", "0.994723 | \n", "0.766169 | \n", "
| 3 | \n", "general_similarity_score | \n", "LightGBM | \n", "0.959288 | \n", "0.759259 | \n", "
| 4 | \n", "general_similarity_score | \n", "CatBoost | \n", "0.994723 | \n", "0.766169 | \n", "
| 5 | \n", "general_similarity_score | \n", "Ensemble | \n", "0.997354 | \n", "0.795918 | \n", "
| 6 | \n", "general_similarity_score_tfidf | \n", "LightGBM | \n", "0.919512 | \n", "0.745455 | \n", "
| 7 | \n", "general_similarity_score_tfidf | \n", "CatBoost | \n", "0.976684 | \n", "0.751220 | \n", "
| 8 | \n", "general_similarity_score_tfidf | \n", "Ensemble | \n", "0.996037 | \n", "0.790244 | \n", "
| 9 | \n", "None | \n", "LightGBM | \n", "0.930864 | \n", "0.745455 | \n", "
| 10 | \n", "None | \n", "CatBoost | \n", "0.975420 | \n", "0.768519 | \n", "
| 11 | \n", "None | \n", "Ensemble | \n", "0.994723 | \n", "0.775120 | \n", "
| 12 | \n", "all | \n", "LightGBM | \n", "0.979221 | \n", "0.763285 | \n", "
| 13 | \n", "all | \n", "CatBoost | \n", "0.993412 | \n", "0.793970 | \n", "
| 14 | \n", "all | \n", "Ensemble | \n", "0.998675 | \n", "0.795812 | \n", "
| \n", " | Feature Set | \n", "Model | \n", "Train F1 | \n", "Test F1 | \n", "
|---|---|---|---|---|
| 0 | \n", "base_attributes | \n", "LightGBM | \n", "0.826754 | \n", "0.700422 | \n", "
| 1 | \n", "base_attributes | \n", "CatBoost | \n", "0.943680 | \n", "0.700935 | \n", "
| 2 | \n", "base_attributes | \n", "Ensemble | \n", "0.970399 | \n", "0.695238 | \n", "
| 3 | \n", "custom_scores | \n", "LightGBM | \n", "0.798694 | \n", "0.525424 | \n", "
| 4 | \n", "custom_scores | \n", "CatBoost | \n", "0.892308 | \n", "0.480769 | \n", "
| 5 | \n", "custom_scores | \n", "Ensemble | \n", "0.954430 | \n", "0.510638 | \n", "
| 6 | \n", "custom_scores_with_essential_base_attributes | \n", "LightGBM | \n", "0.933168 | \n", "0.767123 | \n", "
| 7 | \n", "custom_scores_with_essential_base_attributes | \n", "CatBoost | \n", "0.990802 | \n", "0.790244 | \n", "
| 8 | \n", "custom_scores_with_essential_base_attributes | \n", "Ensemble | \n", "0.996037 | \n", "0.800000 | \n", "
| 9 | \n", "base_attributes_with_essential_custom_scores | \n", "LightGBM | \n", "0.955640 | \n", "0.739336 | \n", "
| 10 | \n", "base_attributes_with_essential_custom_scores | \n", "CatBoost | \n", "0.990802 | \n", "0.788177 | \n", "
| 11 | \n", "base_attributes_with_essential_custom_scores | \n", "Ensemble | \n", "0.997354 | \n", "0.797927 | \n", "
| 12 | \n", "all | \n", "LightGBM | \n", "0.966667 | \n", "0.768519 | \n", "
| 13 | \n", "all | \n", "CatBoost | \n", "0.988204 | \n", "0.807882 | \n", "
| 14 | \n", "all | \n", "Ensemble | \n", "0.997354 | \n", "0.802030 | \n", "
| \n", " | Resampling | \n", "Model | \n", "Train F1 | \n", "Test F1 | \n", "
|---|---|---|---|---|
| 0 | \n", "Original | \n", "LightGBM | \n", "0.933168 | \n", "0.767123 | \n", "
| 1 | \n", "Original | \n", "CatBoost | \n", "0.990802 | \n", "0.790244 | \n", "
| 2 | \n", "Original | \n", "Ensemble | \n", "0.996037 | \n", "0.800000 | \n", "
| 3 | \n", "SMOTE_25% | \n", "LightGBM | \n", "0.977146 | \n", "0.792453 | \n", "
| 4 | \n", "SMOTE_25% | \n", "CatBoost | \n", "0.991850 | \n", "0.780488 | \n", "
| 5 | \n", "SMOTE_25% | \n", "Ensemble | \n", "0.999368 | \n", "0.803922 | \n", "
| 6 | \n", "SMOTE_50% | \n", "LightGBM | \n", "0.992774 | \n", "0.800000 | \n", "
| 7 | \n", "SMOTE_50% | \n", "CatBoost | \n", "0.998423 | \n", "0.763819 | \n", "
| 8 | \n", "SMOTE_50% | \n", "Ensemble | \n", "1.000000 | \n", "0.800000 | \n", "
| 9 | \n", "SMOTE_75% | \n", "LightGBM | \n", "0.995798 | \n", "0.795918 | \n", "
| 10 | \n", "SMOTE_75% | \n", "CatBoost | \n", "0.999369 | \n", "0.791444 | \n", "
| 11 | \n", "SMOTE_75% | \n", "Ensemble | \n", "1.000000 | \n", "0.779487 | \n", "
| 12 | \n", "SMOTE_100% | \n", "LightGBM | \n", "0.996687 | \n", "0.781250 | \n", "
| 13 | \n", "SMOTE_100% | \n", "CatBoost | \n", "0.999211 | \n", "0.774194 | \n", "
| 14 | \n", "SMOTE_100% | \n", "Ensemble | \n", "1.000000 | \n", "0.783505 | \n", "
| 15 | \n", "ADASYN_25% | \n", "LightGBM | \n", "0.970149 | \n", "0.764151 | \n", "
| 16 | \n", "ADASYN_25% | \n", "CatBoost | \n", "0.990305 | \n", "0.780488 | \n", "
| 17 | \n", "ADASYN_25% | \n", "Ensemble | \n", "0.997906 | \n", "0.794118 | \n", "
| 18 | \n", "ADASYN_50% | \n", "LightGBM | \n", "0.991558 | \n", "0.790244 | \n", "
| 19 | \n", "ADASYN_50% | \n", "CatBoost | \n", "0.997387 | \n", "0.785714 | \n", "
| 20 | \n", "ADASYN_50% | \n", "Ensemble | \n", "0.999673 | \n", "0.796117 | \n", "
| 21 | \n", "ADASYN_75% | \n", "LightGBM | \n", "0.996147 | \n", "0.756477 | \n", "
| 22 | \n", "ADASYN_75% | \n", "CatBoost | \n", "0.999142 | \n", "0.771574 | \n", "
| 23 | \n", "ADASYN_75% | \n", "Ensemble | \n", "1.000000 | \n", "0.778325 | \n", "
| 24 | \n", "ADASYN_100% | \n", "LightGBM | \n", "0.996505 | \n", "0.779487 | \n", "
| 25 | \n", "ADASYN_100% | \n", "CatBoost | \n", "0.998727 | \n", "0.747368 | \n", "
| 26 | \n", "ADASYN_100% | \n", "Ensemble | \n", "1.000000 | \n", "0.781726 | \n", "
| 27 | \n", "BorderlineSMOTE_25% | \n", "LightGBM | \n", "0.983219 | \n", "0.813084 | \n", "
| 28 | \n", "BorderlineSMOTE_25% | \n", "CatBoost | \n", "0.993719 | \n", "0.796117 | \n", "
| 29 | \n", "BorderlineSMOTE_25% | \n", "Ensemble | \n", "0.999368 | \n", "0.811881 | \n", "
| 30 | \n", "BorderlineSMOTE_50% | \n", "LightGBM | \n", "0.992453 | \n", "0.803828 | \n", "
| 31 | \n", "BorderlineSMOTE_50% | \n", "CatBoost | \n", "0.998422 | \n", "0.806122 | \n", "
| 32 | \n", "BorderlineSMOTE_50% | \n", "Ensemble | \n", "1.000000 | \n", "0.807882 | \n", "
| 33 | \n", "BorderlineSMOTE_75% | \n", "LightGBM | \n", "0.996845 | \n", "0.810256 | \n", "
| 34 | \n", "BorderlineSMOTE_75% | \n", "CatBoost | \n", "0.999579 | \n", "0.802083 | \n", "
| 35 | \n", "BorderlineSMOTE_75% | \n", "Ensemble | \n", "1.000000 | \n", "0.804020 | \n", "
| 36 | \n", "BorderlineSMOTE_100% | \n", "LightGBM | \n", "0.997789 | \n", "0.776596 | \n", "
| 37 | \n", "BorderlineSMOTE_100% | \n", "CatBoost | \n", "0.998736 | \n", "0.753927 | \n", "
| 38 | \n", "BorderlineSMOTE_100% | \n", "Ensemble | \n", "1.000000 | \n", "0.797927 | \n", "
| 39 | \n", "SVMSMOTE_25% | \n", "LightGBM | \n", "0.983219 | \n", "0.786730 | \n", "
| 40 | \n", "SVMSMOTE_25% | \n", "CatBoost | \n", "0.994343 | \n", "0.794118 | \n", "
| 41 | \n", "SVMSMOTE_25% | \n", "Ensemble | \n", "0.998737 | \n", "0.817734 | \n", "
| 42 | \n", "SVMSMOTE_50% | \n", "LightGBM | \n", "0.993390 | \n", "0.793814 | \n", "
| 43 | \n", "SVMSMOTE_50% | \n", "CatBoost | \n", "0.998107 | \n", "0.791878 | \n", "
| 44 | \n", "SVMSMOTE_50% | \n", "Ensemble | \n", "0.999684 | \n", "0.816327 | \n", "
| 45 | \n", "SVMSMOTE_75% | \n", "LightGBM | \n", "0.994747 | \n", "0.822335 | \n", "
| 46 | \n", "SVMSMOTE_75% | \n", "CatBoost | \n", "0.998527 | \n", "0.800000 | \n", "
| 47 | \n", "SVMSMOTE_75% | \n", "Ensemble | \n", "1.000000 | \n", "0.824121 | \n", "
| 48 | \n", "SVMSMOTE_100% | \n", "LightGBM | \n", "0.996845 | \n", "0.816754 | \n", "
| 49 | \n", "SVMSMOTE_100% | \n", "CatBoost | \n", "0.998895 | \n", "0.774869 | \n", "
| 50 | \n", "SVMSMOTE_100% | \n", "Ensemble | \n", "1.000000 | \n", "0.824742 | \n", "
| 51 | \n", "Downsampling | \n", "LightGBM | \n", "1.000000 | \n", "0.644444 | \n", "
| 52 | \n", "Downsampling | \n", "CatBoost | \n", "1.000000 | \n", "0.644928 | \n", "
| 53 | \n", "Downsampling | \n", "Ensemble | \n", "1.000000 | \n", "0.626335 | \n", "
| \n", " | Model | \n", "Attribute | \n", "Demographic Parity Diff | \n", "Equalized Odds Diff | \n", "
|---|---|---|---|---|
| 0 | \n", "CatBoost | \n", "Sex_int | \n", "0.023078 | \n", "0.049602 | \n", "
| 1 | \n", "CatBoost | \n", "Protected category | \n", "0.232991 | \n", "0.027778 | \n", "
| 2 | \n", "CatBoost | \n", "Age Range_int | \n", "0.108576 | \n", "0.600000 | \n", "
| 3 | \n", "CatBoost | \n", "Italian Residence | \n", "0.108333 | \n", "0.784946 | \n", "
| 4 | \n", "CatBoost | \n", "European Residence | \n", "0.103763 | \n", "0.776596 | \n", "
| 5 | \n", "LightGBM | \n", "Sex_int | \n", "0.022280 | \n", "0.091855 | \n", "
| 6 | \n", "LightGBM | \n", "Protected category | \n", "0.227290 | \n", "0.061111 | \n", "
| 7 | \n", "LightGBM | \n", "Age Range_int | \n", "0.100511 | \n", "0.400000 | \n", "
| 8 | \n", "LightGBM | \n", "Italian Residence | \n", "0.091356 | \n", "0.817204 | \n", "
| 9 | \n", "LightGBM | \n", "European Residence | \n", "0.002787 | \n", "0.808511 | \n", "
| 10 | \n", "Ensemble | \n", "Sex_int | \n", "0.038811 | \n", "0.033279 | \n", "
| 11 | \n", "Ensemble | \n", "Protected category | \n", "0.225010 | \n", "0.083333 | \n", "
| 12 | \n", "Ensemble | \n", "Age Range_int | \n", "0.124705 | \n", "0.400000 | \n", "
| 13 | \n", "Ensemble | \n", "Italian Residence | \n", "0.116667 | \n", "0.838710 | \n", "
| 14 | \n", "Ensemble | \n", "European Residence | \n", "0.111745 | \n", "0.829787 | \n", "
| \n", " | Model | \n", "Precision | \n", "Recall | \n", "
|---|---|---|---|
| 0 | \n", "CatBoost | \n", "0.802198 | \n", "0.776596 | \n", "
| 1 | \n", "LightGBM | \n", "0.791667 | \n", "0.808511 | \n", "
| 2 | \n", "Ensemble | \n", "0.795918 | \n", "0.829787 | \n", "
| \n", " | Setting | \n", "Model | \n", "Precision | \n", "Recall | \n", "F1 Score | \n", "
|---|---|---|---|---|---|
| 0 | \n", "With Protected | \n", "CatBoost | \n", "0.806122 | \n", "0.840426 | \n", "0.822917 | \n", "
| 1 | \n", "With Protected | \n", "LightGBM | \n", "0.780000 | \n", "0.829787 | \n", "0.804124 | \n", "
| 2 | \n", "With Protected | \n", "Ensemble | \n", "0.784314 | \n", "0.851064 | \n", "0.816327 | \n", "
| 3 | \n", "Without Protected | \n", "CatBoost | \n", "0.780000 | \n", "0.829787 | \n", "0.804124 | \n", "
| 4 | \n", "Without Protected | \n", "LightGBM | \n", "0.780000 | \n", "0.829787 | \n", "0.804124 | \n", "
| 5 | \n", "Without Protected | \n", "Ensemble | \n", "0.785714 | \n", "0.819149 | \n", "0.802083 | \n", "
| \n", " | Setting | \n", "Model | \n", "Attribute | \n", "Metric | \n", "Score | \n", "
|---|---|---|---|---|---|
| 0 | \n", "With Protected | \n", "CatBoost | \n", "Sex_int | \n", "Demographic Parity Diff | \n", "0.057537 | \n", "
| 1 | \n", "With Protected | \n", "CatBoost | \n", "Sex_int | \n", "Equalized Odds Diff | \n", "0.035931 | \n", "
| 2 | \n", "With Protected | \n", "CatBoost | \n", "Protected category | \n", "Demographic Parity Diff | \n", "0.111872 | \n", "
| 3 | \n", "With Protected | \n", "CatBoost | \n", "Protected category | \n", "Equalized Odds Diff | \n", "0.849462 | \n", "
| 4 | \n", "With Protected | \n", "CatBoost | \n", "Age Range_int | \n", "Demographic Parity Diff | \n", "0.167831 | \n", "
| 5 | \n", "With Protected | \n", "CatBoost | \n", "Age Range_int | \n", "Equalized Odds Diff | \n", "0.333333 | \n", "
| 6 | \n", "With Protected | \n", "CatBoost | \n", "Italian Residence | \n", "Demographic Parity Diff | \n", "0.098545 | \n", "
| 7 | \n", "With Protected | \n", "CatBoost | \n", "Italian Residence | \n", "Equalized Odds Diff | \n", "0.347826 | \n", "
| 8 | \n", "With Protected | \n", "CatBoost | \n", "European Residence | \n", "Demographic Parity Diff | \n", "0.113295 | \n", "
| 9 | \n", "With Protected | \n", "CatBoost | \n", "European Residence | \n", "Equalized Odds Diff | \n", "0.840426 | \n", "
| 10 | \n", "With Protected | \n", "LightGBM | \n", "Sex_int | \n", "Demographic Parity Diff | \n", "0.069165 | \n", "
| 11 | \n", "With Protected | \n", "LightGBM | \n", "Sex_int | \n", "Equalized Odds Diff | \n", "0.030956 | \n", "
| 12 | \n", "With Protected | \n", "LightGBM | \n", "Protected category | \n", "Demographic Parity Diff | \n", "0.013014 | \n", "
| 13 | \n", "With Protected | \n", "LightGBM | \n", "Protected category | \n", "Equalized Odds Diff | \n", "0.172043 | \n", "
| 14 | \n", "With Protected | \n", "LightGBM | \n", "Age Range_int | \n", "Demographic Parity Diff | \n", "0.156066 | \n", "
| 15 | \n", "With Protected | \n", "LightGBM | \n", "Age Range_int | \n", "Equalized Odds Diff | \n", "0.250000 | \n", "
| 16 | \n", "With Protected | \n", "LightGBM | \n", "Italian Residence | \n", "Demographic Parity Diff | \n", "0.081567 | \n", "
| 17 | \n", "With Protected | \n", "LightGBM | \n", "Italian Residence | \n", "Equalized Odds Diff | \n", "0.173913 | \n", "
| 18 | \n", "With Protected | \n", "LightGBM | \n", "European Residence | \n", "Demographic Parity Diff | \n", "0.115607 | \n", "
| 19 | \n", "With Protected | \n", "LightGBM | \n", "European Residence | \n", "Equalized Odds Diff | \n", "0.829787 | \n", "
| 20 | \n", "With Protected | \n", "Ensemble | \n", "Sex_int | \n", "Demographic Parity Diff | \n", "0.066364 | \n", "
| 21 | \n", "With Protected | \n", "Ensemble | \n", "Sex_int | \n", "Equalized Odds Diff | \n", "0.016650 | \n", "
| 22 | \n", "With Protected | \n", "Ensemble | \n", "Protected category | \n", "Demographic Parity Diff | \n", "0.015297 | \n", "
| 23 | \n", "With Protected | \n", "Ensemble | \n", "Protected category | \n", "Equalized Odds Diff | \n", "0.150538 | \n", "
| 24 | \n", "With Protected | \n", "Ensemble | \n", "Age Range_int | \n", "Demographic Parity Diff | \n", "0.140441 | \n", "
| 25 | \n", "With Protected | \n", "Ensemble | \n", "Age Range_int | \n", "Equalized Odds Diff | \n", "0.250000 | \n", "
| 26 | \n", "With Protected | \n", "Ensemble | \n", "Italian Residence | \n", "Demographic Parity Diff | \n", "0.103358 | \n", "
| 27 | \n", "With Protected | \n", "Ensemble | \n", "Italian Residence | \n", "Equalized Odds Diff | \n", "0.358696 | \n", "
| 28 | \n", "With Protected | \n", "Ensemble | \n", "European Residence | \n", "Demographic Parity Diff | \n", "0.117919 | \n", "
| 29 | \n", "With Protected | \n", "Ensemble | \n", "European Residence | \n", "Equalized Odds Diff | \n", "0.851064 | \n", "
| 30 | \n", "Without Protected | \n", "CatBoost | \n", "Sex_int | \n", "Demographic Parity Diff | \n", "0.054736 | \n", "
| 31 | \n", "Without Protected | \n", "CatBoost | \n", "Sex_int | \n", "Equalized Odds Diff | \n", "0.072968 | \n", "
| 32 | \n", "Without Protected | \n", "CatBoost | \n", "Protected category | \n", "Demographic Parity Diff | \n", "0.114155 | \n", "
| 33 | \n", "Without Protected | \n", "CatBoost | \n", "Protected category | \n", "Equalized Odds Diff | \n", "0.838710 | \n", "
| 34 | \n", "Without Protected | \n", "CatBoost | \n", "Age Range_int | \n", "Demographic Parity Diff | \n", "0.134559 | \n", "
| 35 | \n", "Without Protected | \n", "CatBoost | \n", "Age Range_int | \n", "Equalized Odds Diff | \n", "0.150000 | \n", "
| 36 | \n", "Without Protected | \n", "CatBoost | \n", "Italian Residence | \n", "Demographic Parity Diff | \n", "0.100952 | \n", "
| 37 | \n", "Without Protected | \n", "CatBoost | \n", "Italian Residence | \n", "Equalized Odds Diff | \n", "0.336957 | \n", "
| 38 | \n", "Without Protected | \n", "CatBoost | \n", "European Residence | \n", "Demographic Parity Diff | \n", "0.115607 | \n", "
| 39 | \n", "Without Protected | \n", "CatBoost | \n", "European Residence | \n", "Equalized Odds Diff | \n", "0.829787 | \n", "
| 40 | \n", "Without Protected | \n", "LightGBM | \n", "Sex_int | \n", "Demographic Parity Diff | \n", "0.061950 | \n", "
| 41 | \n", "Without Protected | \n", "LightGBM | \n", "Sex_int | \n", "Equalized Odds Diff | \n", "0.030956 | \n", "
| 42 | \n", "Without Protected | \n", "LightGBM | \n", "Protected category | \n", "Demographic Parity Diff | \n", "0.013014 | \n", "
| 43 | \n", "Without Protected | \n", "LightGBM | \n", "Protected category | \n", "Equalized Odds Diff | \n", "0.172043 | \n", "
| 44 | \n", "Without Protected | \n", "LightGBM | \n", "Age Range_int | \n", "Demographic Parity Diff | \n", "0.134559 | \n", "
| 45 | \n", "Without Protected | \n", "LightGBM | \n", "Age Range_int | \n", "Equalized Odds Diff | \n", "0.333333 | \n", "
| 46 | \n", "Without Protected | \n", "LightGBM | \n", "Italian Residence | \n", "Demographic Parity Diff | \n", "0.081567 | \n", "
| 47 | \n", "Without Protected | \n", "LightGBM | \n", "Italian Residence | \n", "Equalized Odds Diff | \n", "0.173913 | \n", "
| 48 | \n", "Without Protected | \n", "LightGBM | \n", "European Residence | \n", "Demographic Parity Diff | \n", "0.115607 | \n", "
| 49 | \n", "Without Protected | \n", "LightGBM | \n", "European Residence | \n", "Equalized Odds Diff | \n", "0.829787 | \n", "
| 50 | \n", "Without Protected | \n", "Ensemble | \n", "Sex_int | \n", "Demographic Parity Diff | \n", "0.057537 | \n", "
| 51 | \n", "Without Protected | \n", "Ensemble | \n", "Sex_int | \n", "Equalized Odds Diff | \n", "0.058043 | \n", "
| 52 | \n", "Without Protected | \n", "Ensemble | \n", "Protected category | \n", "Demographic Parity Diff | \n", "0.010731 | \n", "
| 53 | \n", "Without Protected | \n", "Ensemble | \n", "Protected category | \n", "Equalized Odds Diff | \n", "0.182796 | \n", "
| 54 | \n", "Without Protected | \n", "Ensemble | \n", "Age Range_int | \n", "Demographic Parity Diff | \n", "0.140441 | \n", "
| 55 | \n", "Without Protected | \n", "Ensemble | \n", "Age Range_int | \n", "Equalized Odds Diff | \n", "0.138158 | \n", "
| 56 | \n", "Without Protected | \n", "Ensemble | \n", "Italian Residence | \n", "Demographic Parity Diff | \n", "0.098545 | \n", "
| 57 | \n", "Without Protected | \n", "Ensemble | \n", "Italian Residence | \n", "Equalized Odds Diff | \n", "0.326087 | \n", "
| 58 | \n", "Without Protected | \n", "Ensemble | \n", "European Residence | \n", "Demographic Parity Diff | \n", "0.113295 | \n", "
| 59 | \n", "Without Protected | \n", "Ensemble | \n", "European Residence | \n", "Equalized Odds Diff | \n", "0.819149 | \n", "
| \n", " | Setting | \n", "Model | \n", "Attribute | \n", "Group | \n", "Precision | \n", "Recall | \n", "
|---|---|---|---|---|---|---|
| 0 | \n", "With Protected | \n", "CatBoost | \n", "Sex_int | \n", "0 | \n", "0.814815 | \n", "0.814815 | \n", "
| 1 | \n", "With Protected | \n", "CatBoost | \n", "Sex_int | \n", "1 | \n", "0.802817 | \n", "0.850746 | \n", "
| 2 | \n", "With Protected | \n", "CatBoost | \n", "Protected category | \n", "0 | \n", "0.806122 | \n", "0.849462 | \n", "
| 3 | \n", "With Protected | \n", "CatBoost | \n", "Protected category | \n", "1 | \n", "0.000000 | \n", "0.000000 | \n", "
| 4 | \n", "With Protected | \n", "CatBoost | \n", "Age Range_int | \n", "0 | \n", "0.833333 | \n", "0.833333 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 85 | \n", "Without Protected | \n", "Ensemble | \n", "Age Range_int | \n", "6 | \n", "1.000000 | \n", "0.833333 | \n", "
| 86 | \n", "Without Protected | \n", "Ensemble | \n", "Italian Residence | \n", "0 | \n", "1.000000 | \n", "0.500000 | \n", "
| 87 | \n", "Without Protected | \n", "Ensemble | \n", "Italian Residence | \n", "1 | \n", "0.783505 | \n", "0.826087 | \n", "
| 88 | \n", "Without Protected | \n", "Ensemble | \n", "European Residence | \n", "0 | \n", "0.000000 | \n", "0.000000 | \n", "
| 89 | \n", "Without Protected | \n", "Ensemble | \n", "European Residence | \n", "1 | \n", "0.785714 | \n", "0.819149 | \n", "
90 rows × 6 columns
\n", "