Module edawesome.sklearn_help
Function for visual evaluation of sklearn models.
Expand source code
"""
Function for visual evaluation of sklearn models.
"""
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, f1_score, auc, RocCurveDisplay
def plot_roc_curve(fpr, tpr, title=None):
plt.plot(fpr, tpr, linewidth=2)
plt.plot([0,1], [0,1], 'k--')
plt.axis([0,1,0,1])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(title)
plt.show()
def pretty_classification_report(estimator, X, y, class_names, how='cv'):
y_pred = estimator.predict(X)
if how == 'cv':
f_score = cross_val_score(estimator, X, y, cv=5, scoring='f1').mean()
elif how == 'test':
f_score = f1_score(y, y_pred)
else:
raise ValueError('how must be either "cv" or "test"')
cm = confusion_matrix(y, y_pred)
cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
# heatmap of confusion matrix
sns.heatmap(cm_df, annot=True, cbar=None, cmap="Blues", fmt='d')
if how == 'cv':
plt.title(f'Cross-validated f-score: {f_score:.3f}')
elif how == 'test':
plt.title(f'Test f-score: {f_score:.3f}')
plt.ylabel('True Class'), plt.xlabel('Predicted Class')
plt.tight_layout()
plt.show()
if how == 'cv':
roc_auc = cross_val_score(estimator, X, y, cv=5, scoring='roc_auc').mean()
elif how == 'test':
roc_auc = roc_auc_score(y, y_pred)
else:
raise ValueError('how must be either "cv" or "test"')
# roc graph
fpr, tpr, thresholds = roc_curve(y, y_pred)
roc_auc = auc(fpr, tpr)
assert roc_auc == roc_auc_score(y, y_pred)
RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='').plot()
plt.title(f'ROC curve: {roc_auc:.3f}')
plt.show()
def plot_predicted_actual(Y_test, name, model=None, X_test=None, Y_pred=None):
if (Y_pred is None) and (model is not None) and (X_test is not None):
Y_pred = model.predict(X_test)
if Y_pred is None:
raise ValueError('Y_pred or model with X_test must be provided')
fig, ax = plt.subplots(figsize=(7, 7))
ax.scatter(Y_test, Y_pred, alpha=0.3)
ax.plot([Y_test.min(), Y_test.max()], [Y_test.min(), Y_test.max()], 'r--', lw=2)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
ax.set_title(f'{name}: predicted vs measured')
plt.show()
Functions
def plot_predicted_actual(Y_test, name, model=None, X_test=None, Y_pred=None)-
Expand source code
def plot_predicted_actual(Y_test, name, model=None, X_test=None, Y_pred=None): if (Y_pred is None) and (model is not None) and (X_test is not None): Y_pred = model.predict(X_test) if Y_pred is None: raise ValueError('Y_pred or model with X_test must be provided') fig, ax = plt.subplots(figsize=(7, 7)) ax.scatter(Y_test, Y_pred, alpha=0.3) ax.plot([Y_test.min(), Y_test.max()], [Y_test.min(), Y_test.max()], 'r--', lw=2) ax.set_xlabel('Measured') ax.set_ylabel('Predicted') ax.set_title(f'{name}: predicted vs measured') plt.show() def plot_roc_curve(fpr, tpr, title=None)-
Expand source code
def plot_roc_curve(fpr, tpr, title=None): plt.plot(fpr, tpr, linewidth=2) plt.plot([0,1], [0,1], 'k--') plt.axis([0,1,0,1]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title(title) plt.show() def pretty_classification_report(estimator, X, y, class_names, how='cv')-
Expand source code
def pretty_classification_report(estimator, X, y, class_names, how='cv'): y_pred = estimator.predict(X) if how == 'cv': f_score = cross_val_score(estimator, X, y, cv=5, scoring='f1').mean() elif how == 'test': f_score = f1_score(y, y_pred) else: raise ValueError('how must be either "cv" or "test"') cm = confusion_matrix(y, y_pred) cm_df = pd.DataFrame(cm, index=class_names, columns=class_names) # heatmap of confusion matrix sns.heatmap(cm_df, annot=True, cbar=None, cmap="Blues", fmt='d') if how == 'cv': plt.title(f'Cross-validated f-score: {f_score:.3f}') elif how == 'test': plt.title(f'Test f-score: {f_score:.3f}') plt.ylabel('True Class'), plt.xlabel('Predicted Class') plt.tight_layout() plt.show() if how == 'cv': roc_auc = cross_val_score(estimator, X, y, cv=5, scoring='roc_auc').mean() elif how == 'test': roc_auc = roc_auc_score(y, y_pred) else: raise ValueError('how must be either "cv" or "test"') # roc graph fpr, tpr, thresholds = roc_curve(y, y_pred) roc_auc = auc(fpr, tpr) assert roc_auc == roc_auc_score(y, y_pred) RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='').plot() plt.title(f'ROC curve: {roc_auc:.3f}') plt.show()