#Bibliotecas necessárias
import pandas as pd
import numpy as np

Importando a base de dados utilizada no exemplo.

df = pd.read_excel("CasoUso_02_Emprestimo_Bancario.xlsx", sheet_name='BASE DE DADOS')
df.head()

Definindo a função que será utilizada para calcular o iv e woe ¹.

def iv_woe(data, target, bins=10, show_woe=False):
    
    #Empty Dataframe
    newDF,woeDF = pd.DataFrame(), pd.DataFrame()
    
    #Extract Column Names
    cols = data.columns
    
    #Run WOE and IV on all the independent variables
    for ivars in cols[~cols.isin([target])]:
        if (data[ivars].dtype.kind in 'bifc') and (len(np.unique(data[ivars]))>10):
            binned_x = pd.qcut(data[ivars], bins,  duplicates='drop')
            d0 = pd.DataFrame({'x': binned_x, 'y': data[target]})
        else:
            d0 = pd.DataFrame({'x': data[ivars], 'y': data[target]})
        d = d0.groupby("x", as_index=False).agg({"y": ["count", "sum"]})
        d.columns = ['Cutoff', 'N', 'Events']
        d['% of Events'] = np.maximum(d['Events'], 0.5) / d['Events'].sum()
        d['Non-Events'] = d['N'] - d['Events']
        d['% of Non-Events'] = np.maximum(d['Non-Events'], 0.5) / d['Non-Events'].sum()
        d['WoE'] = np.log(d['% of Events']/d['% of Non-Events'])
        d['IV'] = d['WoE'] * (d['% of Events'] - d['% of Non-Events'])
        d.insert(loc=0, column='Variable', value=ivars)
        print("Information value of " + ivars + " is " + str(round(d['IV'].sum(),6)))
        temp =pd.DataFrame({"Variable" : [ivars], "IV" : [d['IV'].sum()]}, columns = ["Variable", "IV"])
        newDF=pd.concat([newDF,temp], axis=0)
        woeDF=pd.concat([woeDF,d], axis=0)

        #Show WOE Table
        if show_woe == True:
            print(d)
    return newDF, woeDF

Parametros necessários para chamar a função:

data - é o data frame aonde estão localizadas as variáveis dependentes e idenpendentes;
target - o nome da coluna onde está armazenado os dados da variável dependente;
bins - quantidade de divisões ou intervalo nas variáveis;
show_woe - quando verdadeiro (True), significa que a função irá imprimir a tabela de cálculo do WOE.

Chamando a função para realizar o cálculo.

iv, woe = iv_woe(data = df, target = 'classif')

Information value of idade is 0.201024
Information value of experiencia is 0.474158
Information value of tempo_endereco is 0.279081
Information value of renda is 0.093243
Information value of debito_renda is 0.750072
Information value of cred_deb is 0.261361
Information value of outros_debitos is 0.114104

print(iv)

         Variable        IV
0           idade  0.201024
0     experiencia  0.474158
0  tempo_endereco  0.279081
0           renda  0.093243
0    debito_renda  0.750072
0        cred_deb  0.261361
0  outros_debitos  0.114104

1. Fonte: https://www.listendata.com/2015/03/weight-of-evidence-woe-and-information.html ↩

	idade	experiencia	tempo_endereco	renda	debito_renda	cred_deb	outros_debitos
0	41	17	12	35.9	11.90	0.504108	3.767992
1	30	13	8	46.7	17.88	1.352694	6.997266
2	40	15	14	61.8	10.64	3.438997	3.136523
3	41	15	14	72.0	29.67	4.165668	17.196732
4	57	7	37	25.6	15.86	1.498199	2.561961