Last active
April 8, 2025 08:18
-
-
Save hdary85/967af97edf124b07cb7a71f20a9f132f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| # On suppose que df_merged est déjà chargé avec toutes les colonnes nécessaires. | |
| # Pour cet exemple, nous utilisons la colonne 'PARTY_TYPE_CODE' qui doit contenir | |
| # 1 pour les particuliers et 2 pour les entreprises. | |
| # Initialisation de la colonne de score total | |
| df_merged['risk_score_total'] = 0 | |
| # ----------------------- | |
| # 1. TV_COUNT | |
| # Particuliers : ≤2 = 5, 3-10 = 10, >10 = 20 | |
| # Entreprises : ≤2 = 3, 3-10 = 7, >10 = 15 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['TV_COUNT'] <= 2, | |
| df_merged['TV_COUNT'].between(3, 10), | |
| df_merged['TV_COUNT'] > 10], | |
| [5, 10, 20], default=0 | |
| ), | |
| np.select( | |
| [df_merged['TV_COUNT'] <= 2, | |
| df_merged['TV_COUNT'].between(3, 10), | |
| df_merged['TV_COUNT'] > 10], | |
| [3, 7, 15], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 2. TV_TOTAL_AMOUNT | |
| # Particuliers : ≤10k = 5, 10k-50k = 15, >50k = 30 | |
| # Entreprises : ≤10k = 3, 10k-50k = 10, >50k = 20 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['TV_TOTAL_AMOUNT'] <= 10000, | |
| df_merged['TV_TOTAL_AMOUNT'].between(10001, 50000), | |
| df_merged['TV_TOTAL_AMOUNT'] > 50000], | |
| [5, 15, 30], default=0 | |
| ), | |
| np.select( | |
| [df_merged['TV_TOTAL_AMOUNT'] <= 10000, | |
| df_merged['TV_TOTAL_AMOUNT'].between(10001, 50000), | |
| df_merged['TV_TOTAL_AMOUNT'] > 50000], | |
| [3, 10, 20], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 3. TV_AVG_AMOUNT | |
| # Particuliers : ≤2k = 5, 2k-10k = 10, >10k = 20 | |
| # Entreprises : ≤2k = 3, 2k-10k = 7, >10k = 15 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['TV_AVG_AMOUNT'] <= 2000, | |
| df_merged['TV_AVG_AMOUNT'].between(2001, 10000), | |
| df_merged['TV_AVG_AMOUNT'] > 10000], | |
| [5, 10, 20], default=0 | |
| ), | |
| np.select( | |
| [df_merged['TV_AVG_AMOUNT'] <= 2000, | |
| df_merged['TV_AVG_AMOUNT'].between(2001, 10000), | |
| df_merged['TV_AVG_AMOUNT'] > 10000], | |
| [3, 7, 15], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 4. TV_RISQUE_SECTEUR | |
| # Particuliers : 1 -> 5, 2 -> 15, 3 -> 30 | |
| # Entreprises : 1 -> 2, 2 -> 10, 3 -> 20 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| df_merged['TV_RISQUE_SECTEUR'].map({1: 5, 2: 15, 3: 30}), | |
| df_merged['TV_RISQUE_SECTEUR'].map({1: 2, 2: 10, 3: 20}) | |
| ) | |
| # ----------------------- | |
| # 5. TV_ORIGINATOR_COUNT | |
| # Particuliers : 0 -> 0, 1-3 -> 10, >3 -> 20 | |
| # Entreprises : 0 -> 0, 1-3 -> 5, >3 -> 10 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['TV_ORIGINATOR_COUNT'] == 0, | |
| df_merged['TV_ORIGINATOR_COUNT'].between(1, 3), | |
| df_merged['TV_ORIGINATOR_COUNT'] > 3], | |
| [0, 10, 20], default=0 | |
| ), | |
| np.select( | |
| [df_merged['TV_ORIGINATOR_COUNT'] == 0, | |
| df_merged['TV_ORIGINATOR_COUNT'].between(1, 3), | |
| df_merged['TV_ORIGINATOR_COUNT'] > 3], | |
| [0, 5, 10], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 6. TV_BENEFICIARY_COUNT | |
| # Particuliers : 0 -> 0, 1-3 -> 10, >3 -> 20 | |
| # Entreprises : 0 -> 0, 1-3 -> 5, >3 -> 10 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['TV_BENEFICIARY_COUNT'] == 0, | |
| df_merged['TV_BENEFICIARY_COUNT'].between(1, 3), | |
| df_merged['TV_BENEFICIARY_COUNT'] > 3], | |
| [0, 10, 20], default=0 | |
| ), | |
| np.select( | |
| [df_merged['TV_BENEFICIARY_COUNT'] == 0, | |
| df_merged['TV_BENEFICIARY_COUNT'].between(1, 3), | |
| df_merged['TV_BENEFICIARY_COUNT'] > 3], | |
| [0, 5, 10], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 7. TV_ORIGINATOR_SUM | |
| # Particuliers : ≤10k = 5, 10k-50k = 15, >50k = 30 | |
| # Entreprises : ≤10k = 3, 10k-50k = 10, >50k = 20 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['TV_ORIGINATOR_SUM'] <= 10000, | |
| df_merged['TV_ORIGINATOR_SUM'].between(10001, 50000), | |
| df_merged['TV_ORIGINATOR_SUM'] > 50000], | |
| [5, 15, 30], default=0 | |
| ), | |
| np.select( | |
| [df_merged['TV_ORIGINATOR_SUM'] <= 10000, | |
| df_merged['TV_ORIGINATOR_SUM'].between(10001, 50000), | |
| df_merged['TV_ORIGINATOR_SUM'] > 50000], | |
| [3, 10, 20], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 8. TV_BENEFICIARY_SUM | |
| # Particuliers : ≤10k = 5, 10k-50k = 15, >50k = 30 | |
| # Entreprises : ≤10k = 3, 10k-50k = 10, >50k = 20 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['TV_BENEFICIARY_SUM'] <= 10000, | |
| df_merged['TV_BENEFICIARY_SUM'].between(10001, 50000), | |
| df_merged['TV_BENEFICIARY_SUM'] > 50000], | |
| [5, 15, 30], default=0 | |
| ), | |
| np.select( | |
| [df_merged['TV_BENEFICIARY_SUM'] <= 10000, | |
| df_merged['TV_BENEFICIARY_SUM'].between(10001, 50000), | |
| df_merged['TV_BENEFICIARY_SUM'] > 50000], | |
| [3, 10, 20], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 9. ESPECES_NBR_TRX | |
| # Particuliers : 0 -> 0, 1-5 -> 10, >5 -> 25 | |
| # Entreprises : 0 -> 0, 1-5 -> 5, >5 -> 15 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['ESPECES_NBR_TRX'] == 0, | |
| df_merged['ESPECES_NBR_TRX'].between(1, 5), | |
| df_merged['ESPECES_NBR_TRX'] > 5], | |
| [0, 10, 25], default=0 | |
| ), | |
| np.select( | |
| [df_merged['ESPECES_NBR_TRX'] == 0, | |
| df_merged['ESPECES_NBR_TRX'].between(1, 5), | |
| df_merged['ESPECES_NBR_TRX'] > 5], | |
| [0, 5, 15], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 10. ESPECES_TOTAL | |
| # Particuliers : ≤5k = 5, 5k-20k = 20, >20k = 40 | |
| # Entreprises : ≤5k = 5, 5k-20k = 10, >20k = 20 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['ESPECES_TOTAL'] <= 5000, | |
| df_merged['ESPECES_TOTAL'].between(5001, 20000), | |
| df_merged['ESPECES_TOTAL'] > 20000], | |
| [5, 20, 40], default=0 | |
| ), | |
| np.select( | |
| [df_merged['ESPECES_TOTAL'] <= 5000, | |
| df_merged['ESPECES_TOTAL'].between(5001, 20000), | |
| df_merged['ESPECES_TOTAL'] > 20000], | |
| [5, 10, 20], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 11. RATIO_ESPECES_TO_TV | |
| # Particuliers : ≤0.1 = 5, 0.11-0.5 = 15, >0.5 = 30 | |
| # Entreprises : ≤0.1 = 3, 0.11-0.5 = 10, >0.5 = 20 | |
| df_merged['risk_score_total'] += np.where( | |
| df_merged['PARTY_TYPE_CODE'] == 1, | |
| np.select( | |
| [df_merged['RATIO_ESPECES_TO_TV'] <= 0.1, | |
| df_merged['RATIO_ESPECES_TO_TV'].between(0.11, 0.5), | |
| df_merged['RATIO_ESPECES_TO_TV'] > 0.5], | |
| [5, 15, 30], default=0 | |
| ), | |
| np.select( | |
| [df_merged['RATIO_ESPECES_TO_TV'] <= 0.1, | |
| df_merged['RATIO_ESPECES_TO_TV'].between(0.11, 0.5), | |
| df_merged['RATIO_ESPECES_TO_TV'] > 0.5], | |
| [3, 10, 20], default=0 | |
| ) | |
| ) | |
| # ----------------------- | |
| # 12. Type de client (optionnel) | |
| # Ici, on pourrait ajouter un supplément en fonction du type, mais comme nous avons déjà différencié | |
| # dans chaque variable, ceci est facultatif. Par exemple, on pourrait ajouter 0 pour entreprise et 5 pour particulier. | |
| df_merged['risk_score_total'] += np.where(df_merged['PARTY_TYPE_CODE'] == 1, 5, 0) | |
| # Affichage des 10 clients avec les scores les plus élevés | |
| print(df_merged[['PARTY_KEY', 'risk_score_total']].sort_values(by='risk_score_total', ascending=False).head(10)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment