Skip to content

Instantly share code, notes, and snippets.

@hdary85
Last active April 8, 2025 08:18
Show Gist options
  • Select an option

  • Save hdary85/967af97edf124b07cb7a71f20a9f132f to your computer and use it in GitHub Desktop.

Select an option

Save hdary85/967af97edf124b07cb7a71f20a9f132f to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
# On suppose que df_merged est déjà chargé avec toutes les colonnes nécessaires.
# Pour cet exemple, nous utilisons la colonne 'PARTY_TYPE_CODE' qui doit contenir
# 1 pour les particuliers et 2 pour les entreprises.
# Initialisation de la colonne de score total
df_merged['risk_score_total'] = 0
# -----------------------
# 1. TV_COUNT
# Particuliers : ≤2 = 5, 3-10 = 10, >10 = 20
# Entreprises : ≤2 = 3, 3-10 = 7, >10 = 15
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['TV_COUNT'] <= 2,
df_merged['TV_COUNT'].between(3, 10),
df_merged['TV_COUNT'] > 10],
[5, 10, 20], default=0
),
np.select(
[df_merged['TV_COUNT'] <= 2,
df_merged['TV_COUNT'].between(3, 10),
df_merged['TV_COUNT'] > 10],
[3, 7, 15], default=0
)
)
# -----------------------
# 2. TV_TOTAL_AMOUNT
# Particuliers : ≤10k = 5, 10k-50k = 15, >50k = 30
# Entreprises : ≤10k = 3, 10k-50k = 10, >50k = 20
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['TV_TOTAL_AMOUNT'] <= 10000,
df_merged['TV_TOTAL_AMOUNT'].between(10001, 50000),
df_merged['TV_TOTAL_AMOUNT'] > 50000],
[5, 15, 30], default=0
),
np.select(
[df_merged['TV_TOTAL_AMOUNT'] <= 10000,
df_merged['TV_TOTAL_AMOUNT'].between(10001, 50000),
df_merged['TV_TOTAL_AMOUNT'] > 50000],
[3, 10, 20], default=0
)
)
# -----------------------
# 3. TV_AVG_AMOUNT
# Particuliers : ≤2k = 5, 2k-10k = 10, >10k = 20
# Entreprises : ≤2k = 3, 2k-10k = 7, >10k = 15
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['TV_AVG_AMOUNT'] <= 2000,
df_merged['TV_AVG_AMOUNT'].between(2001, 10000),
df_merged['TV_AVG_AMOUNT'] > 10000],
[5, 10, 20], default=0
),
np.select(
[df_merged['TV_AVG_AMOUNT'] <= 2000,
df_merged['TV_AVG_AMOUNT'].between(2001, 10000),
df_merged['TV_AVG_AMOUNT'] > 10000],
[3, 7, 15], default=0
)
)
# -----------------------
# 4. TV_RISQUE_SECTEUR
# Particuliers : 1 -> 5, 2 -> 15, 3 -> 30
# Entreprises : 1 -> 2, 2 -> 10, 3 -> 20
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
df_merged['TV_RISQUE_SECTEUR'].map({1: 5, 2: 15, 3: 30}),
df_merged['TV_RISQUE_SECTEUR'].map({1: 2, 2: 10, 3: 20})
)
# -----------------------
# 5. TV_ORIGINATOR_COUNT
# Particuliers : 0 -> 0, 1-3 -> 10, >3 -> 20
# Entreprises : 0 -> 0, 1-3 -> 5, >3 -> 10
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['TV_ORIGINATOR_COUNT'] == 0,
df_merged['TV_ORIGINATOR_COUNT'].between(1, 3),
df_merged['TV_ORIGINATOR_COUNT'] > 3],
[0, 10, 20], default=0
),
np.select(
[df_merged['TV_ORIGINATOR_COUNT'] == 0,
df_merged['TV_ORIGINATOR_COUNT'].between(1, 3),
df_merged['TV_ORIGINATOR_COUNT'] > 3],
[0, 5, 10], default=0
)
)
# -----------------------
# 6. TV_BENEFICIARY_COUNT
# Particuliers : 0 -> 0, 1-3 -> 10, >3 -> 20
# Entreprises : 0 -> 0, 1-3 -> 5, >3 -> 10
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['TV_BENEFICIARY_COUNT'] == 0,
df_merged['TV_BENEFICIARY_COUNT'].between(1, 3),
df_merged['TV_BENEFICIARY_COUNT'] > 3],
[0, 10, 20], default=0
),
np.select(
[df_merged['TV_BENEFICIARY_COUNT'] == 0,
df_merged['TV_BENEFICIARY_COUNT'].between(1, 3),
df_merged['TV_BENEFICIARY_COUNT'] > 3],
[0, 5, 10], default=0
)
)
# -----------------------
# 7. TV_ORIGINATOR_SUM
# Particuliers : ≤10k = 5, 10k-50k = 15, >50k = 30
# Entreprises : ≤10k = 3, 10k-50k = 10, >50k = 20
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['TV_ORIGINATOR_SUM'] <= 10000,
df_merged['TV_ORIGINATOR_SUM'].between(10001, 50000),
df_merged['TV_ORIGINATOR_SUM'] > 50000],
[5, 15, 30], default=0
),
np.select(
[df_merged['TV_ORIGINATOR_SUM'] <= 10000,
df_merged['TV_ORIGINATOR_SUM'].between(10001, 50000),
df_merged['TV_ORIGINATOR_SUM'] > 50000],
[3, 10, 20], default=0
)
)
# -----------------------
# 8. TV_BENEFICIARY_SUM
# Particuliers : ≤10k = 5, 10k-50k = 15, >50k = 30
# Entreprises : ≤10k = 3, 10k-50k = 10, >50k = 20
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['TV_BENEFICIARY_SUM'] <= 10000,
df_merged['TV_BENEFICIARY_SUM'].between(10001, 50000),
df_merged['TV_BENEFICIARY_SUM'] > 50000],
[5, 15, 30], default=0
),
np.select(
[df_merged['TV_BENEFICIARY_SUM'] <= 10000,
df_merged['TV_BENEFICIARY_SUM'].between(10001, 50000),
df_merged['TV_BENEFICIARY_SUM'] > 50000],
[3, 10, 20], default=0
)
)
# -----------------------
# 9. ESPECES_NBR_TRX
# Particuliers : 0 -> 0, 1-5 -> 10, >5 -> 25
# Entreprises : 0 -> 0, 1-5 -> 5, >5 -> 15
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['ESPECES_NBR_TRX'] == 0,
df_merged['ESPECES_NBR_TRX'].between(1, 5),
df_merged['ESPECES_NBR_TRX'] > 5],
[0, 10, 25], default=0
),
np.select(
[df_merged['ESPECES_NBR_TRX'] == 0,
df_merged['ESPECES_NBR_TRX'].between(1, 5),
df_merged['ESPECES_NBR_TRX'] > 5],
[0, 5, 15], default=0
)
)
# -----------------------
# 10. ESPECES_TOTAL
# Particuliers : ≤5k = 5, 5k-20k = 20, >20k = 40
# Entreprises : ≤5k = 5, 5k-20k = 10, >20k = 20
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['ESPECES_TOTAL'] <= 5000,
df_merged['ESPECES_TOTAL'].between(5001, 20000),
df_merged['ESPECES_TOTAL'] > 20000],
[5, 20, 40], default=0
),
np.select(
[df_merged['ESPECES_TOTAL'] <= 5000,
df_merged['ESPECES_TOTAL'].between(5001, 20000),
df_merged['ESPECES_TOTAL'] > 20000],
[5, 10, 20], default=0
)
)
# -----------------------
# 11. RATIO_ESPECES_TO_TV
# Particuliers : ≤0.1 = 5, 0.11-0.5 = 15, >0.5 = 30
# Entreprises : ≤0.1 = 3, 0.11-0.5 = 10, >0.5 = 20
df_merged['risk_score_total'] += np.where(
df_merged['PARTY_TYPE_CODE'] == 1,
np.select(
[df_merged['RATIO_ESPECES_TO_TV'] <= 0.1,
df_merged['RATIO_ESPECES_TO_TV'].between(0.11, 0.5),
df_merged['RATIO_ESPECES_TO_TV'] > 0.5],
[5, 15, 30], default=0
),
np.select(
[df_merged['RATIO_ESPECES_TO_TV'] <= 0.1,
df_merged['RATIO_ESPECES_TO_TV'].between(0.11, 0.5),
df_merged['RATIO_ESPECES_TO_TV'] > 0.5],
[3, 10, 20], default=0
)
)
# -----------------------
# 12. Type de client (optionnel)
# Ici, on pourrait ajouter un supplément en fonction du type, mais comme nous avons déjà différencié
# dans chaque variable, ceci est facultatif. Par exemple, on pourrait ajouter 0 pour entreprise et 5 pour particulier.
df_merged['risk_score_total'] += np.where(df_merged['PARTY_TYPE_CODE'] == 1, 5, 0)
# Affichage des 10 clients avec les scores les plus élevés
print(df_merged[['PARTY_KEY', 'risk_score_total']].sort_values(by='risk_score_total', ascending=False).head(10))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment