Local Feature Attributions
Local attributions explain individual predictions by decomposing them into feature contributions.
scikit-explain supports:
SHAP — SHapley Additive Explanations
LIME — Local Interpretable Model Explanations
Tree Interpreter — tree-based model decomposition
[ ]:
import skexplain
import plotting_config
import shap
[ ]:
# Loading the training data and pre-fit models
estimators = skexplain.load_models()
X, y = skexplain.load_data()
[ ]:
# Pick a single example for local attribution
single_example = X.iloc[[0]]
explainer = skexplain.ExplainToolkit(estimators[0], X=single_example)
explainer.set_plotting_config(
display_feature_names=plotting_config.display_feature_names,
display_units=plotting_config.display_units,
)
Computing Local Attributions
[ ]:
# SHAP masker handles missing features using correlations in the dataset
shap_kws = {
'masker': shap.maskers.Partition(X, max_samples=100, clustering='correlation'),
'algorithm': 'permutation',
}
# LIME requires the training data
lime_kws = {
'training_data': X.values,
'categorical_names': ['rural', 'urban'],
}
# Compute all three attribution methods at once
contrib_ds = explainer.local_attributions(
method=['shap', 'lime', 'tree_interpreter'],
shap_kws=shap_kws,
lime_kws=lime_kws,
)
[ ]:
# The result is an xarray.Dataset with attribution values for each method
contrib_ds
Plotting Attributions (Waterfall Plot)
[ ]:
fig, axes = explainer.plot_contributions(contrib=contrib_ds)
Performance-Based Attributions
Average attributions across the best and worst-performing examples.
[ ]:
# Create a new ExplainToolkit with the full dataset
explainer = skexplain.ExplainToolkit(estimators[0], X=X, y=y)
explainer.set_plotting_config(
display_feature_names=plotting_config.display_feature_names,
display_units=plotting_config.display_units,
)
# Compute performance-based attributions using tree interpreter
tree_results = explainer.average_attributions(
method='tree_interpreter',
performance_based=True,
n_samples=100,
)
[ ]:
# Plot the top 5 features for selected performance categories
perf_keys = ['Best Hits', 'Worst False Alarms', 'Worst Misses']
fig, axes = explainer.plot_contributions(
contrib=tree_results,
perf_keys=perf_keys,
num_features=5,
)
Regression Example
[ ]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
data = fetch_california_housing()
X_reg = data['data']
y_reg = data['target']
feature_names = data['feature_names']
model = RandomForestRegressor()
model.fit(X_reg, y_reg)
[ ]:
single_example = X_reg[[0]]
explainer = skexplain.ExplainToolkit(
('Random Forest', model),
X=single_example,
feature_names=feature_names,
)
shap_kws = {
'masker': shap.maskers.Partition(X_reg, max_samples=100, clustering='correlation'),
'algorithm': 'auto',
}
results = explainer.local_attributions(method='shap', shap_kws=shap_kws)
fig = explainer.plot_contributions(results, figsize=(4, 8))