The modelStudio()
function uses DALEX
explainers created with DALEX::explain()
or DALEXtra::explain_*()
.
# packages for explainer objects
install.packages("DALEX")
install.packages("DALEXtra")
# update main dependencies
install.packages("ingredients")
install.packages("iBreakDown")
In this example we will fit a ranger
model on titanic
data.
# load packages and data
library(mlr)
library(DALEXtra)
library(modelStudio)
data <- DALEX::titanic_imputed
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
# mlr ClassifTask takes target as factor
train$survived <- as.factor(train$survived)
# fit a model
task <- makeClassifTask(id = "titanic", data = train, target = "survived")
learner <- makeLearner("classif.ranger", predict.type = "prob")
model <- train(learner, task)
# create an explainer for the model
explainer <- explain_mlr(model,
data = test,
y = test$survived,
label = "mlr")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer,
new_observation)
In this example we will fit a ranger
model on titanic
data.
# load packages and data
library(mlr3)
library(mlr3learners)
library(DALEXtra)
library(modelStudio)
data <- DALEX::titanic_imputed
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
# mlr3 TaskClassif takes target as factor
train$survived <- as.factor(train$survived)
# fit a model
task <- TaskClassif$new(id = "titanic", backend = train, target = "survived")
learner <- lrn("classif.ranger", predict_type = "prob")
learner$train(task)
# create an explainer for the model
explainer <- explain_mlr3(learner,
data = test,
y = test$survived,
label = "mlr3")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer,
new_observation)
In this example we will fit a xgboost
model on titanic
data.
# load packages and data
library(xgboost)
library(DALEX)
library(modelStudio)
data <- DALEX::titanic_imputed
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
train_matrix <- model.matrix(survived ~.-1, train)
test_matrix <- model.matrix(survived ~.-1, test)
# fit a model
xgb_matrix <- xgb.DMatrix(train_matrix, label = train$survived)
params <- list(max_depth = 7, objective = "binary:logistic", eval_metric = "auc")
model <- xgb.train(params, xgb_matrix, nrounds = 500)
# create an explainer for the model
explainer <- explain(model,
data = test_matrix,
y = test$survived,
label = "xgboost")
# pick observations
new_observation <- test_matrix[1:2, , drop=FALSE]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer,
new_observation,
options = modelStudioOptions(margin_left = 140))
In this example we will fit a gbm
model on titanic
data.
# load packages and data
library(caret)
library(DALEX)
library(modelStudio)
data <- DALEX::titanic_imputed
# split the data
index <- sample(1:nrow(data), 0.7*nrow(data))
train <- data[index,]
test <- data[-index,]
# caret train takes target as factor
train$survived <- as.factor(train$survived)
# fit a model
cv <- trainControl(method = "repeatedcv", number = 3, repeats = 10)
model <- train(survived ~ ., data = train, method = "gbm", trControl = cv)
# create an explainer for the model
explainer <- explain(model,
data = test,
y = test$survived,
label = "caret")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer,
new_observation)
In this example we will fit a h2o.automl
model on titanic
data.
# load packages and data
library(h2o)
library(DALEXtra)
library(modelStudio)
data <- DALEX::titanic_imputed
# init h2o
h2o.init()
# split the data
h2o_split <- h2o.splitFrame(as.h2o(data))
train <- h2o_split[[1]]
test <- as.data.frame(h2o_split[[2]])
# h2o automl takes target as factor
train$survived <- as.factor(train$survived)
# fit a model
automl <- h2o.automl(y = "survived", training_frame = train, max_runtime_secs = 30)
model <- automl@leader
# stop h2o progress printing
h2o.no_progress()
# create an explainer for the model
explainer <- explain_h2o(model,
data = test,
y = test$survived,
label = "h2o")
# pick observations
new_observation <- test[1:2,]
rownames(new_observation) <- c("id1", "id2")
# make a studio for the model
modelStudio(explainer,
new_observation,
B = 5)
# shutdown h2o
h2o.shutdown(prompt = FALSE)
The modelStudio()
function uses dalex
explainers created with dalex.Explainer()
.
Use pickle
Python module and reticulate
R package to easily make a studio for a model.
# package for pickle load
install.packages("reticulate")
# update main dependencies
install.packages("ingredients")
install.packages("iBreakDown")
In this example we will fit a Pipeline MLPClassifier
model on titanic
data.
First, use dalex
in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.neural_network import MLPClassifier
data = dx.datasets.load_titanic()
X = data.drop(columns='survived')
y = data.survived
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y)
# fit a pipeline model
numerical_features = ['age', 'fare', 'sibsp', 'parch']
numerical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
]
)
categorical_features = ['gender', 'class', 'embarked']
categorical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
]
)
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
]
)
classifier = MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=500)
model = Pipeline(
steps=[
('preprocessor', preprocessor),
('classifier', classifier)
]
)
model.fit(X_train, y_train)
# create an explainer for the model
explainer = dx.Explainer(model, data=X_test, y=y_test, label='scikit-learn')
#! remove residual_function before dump !
explainer.residual_function = None
# pack the explainer into a pickle file
import pickle
pickle_out = open('explainer_scikitlearn.pickle', 'wb')
pickle.dump(explainer, pickle_out)
pickle_out.close()
Then, use modelStudio
in R:
In this example we will fit a Pipeline LGBMClassifier
model on titanic
data.
First, use dalex
in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from lightgbm import LGBMClassifier
data = dx.datasets.load_titanic()
X = data.drop(columns='survived')
y = data.survived
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y)
# fit a pipeline model
numerical_features = ['age', 'fare', 'sibsp', 'parch']
numerical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
]
)
categorical_features = ['gender', 'class', 'embarked']
categorical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
]
)
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
]
)
classifier = LGBMClassifier(n_estimators=300)
model = Pipeline(
steps=[
('preprocessor', preprocessor),
('classifier', classifier)
]
)
model.fit(X_train, y_train)
# create an explainer for the model
explainer = dx.Explainer(model, data=X_test, y=y_test, label='lightGBM')
#! remove residual_function before dump !
explainer.residual_function = None
# pack the explainer into a pickle file
import pickle
pickle_out = open('explainer_lightgbm.pickle', 'wb')
pickle.dump(explainer, pickle_out)
pickle_out.close()
Then, use modelStudio
in R:
In this example we will fit a Pipeline KerasClassifier
model on titanic
data.
First, use dalex
in Python:
# load packages and data
import dalex as dx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense
from keras.models import Sequential
data = dx.datasets.load_titanic()
X = data.drop(columns='survived')
y = data.survived
# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y)
# fit a pipeline model
numerical_features = ['age', 'fare', 'sibsp', 'parch']
numerical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())
]
)
categorical_features = ['gender', 'class', 'embarked']
categorical_transformer = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
]
)
preprocessor = ColumnTransformer(
transformers=[
('num', numerical_transformer, numerical_features),
('cat', categorical_transformer, categorical_features)
]
)
def create_architecture():
model = Sequential()
# there are 17 inputs after the pipeline
model.add(Dense(60, input_dim=17, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
classifier = KerasClassifier(build_fn=create_architecture, epochs=100, batch_size=5)
model = Pipeline(
steps=[
('preprocessor', preprocessor),
('classifier', classifier)
]
)
model.fit(X_train, y_train)
# create an explainer for the model
explainer = dx.Explainer(model, data=X_test, y=y_test, label='keras')
#! remove residual_function before dump !
explainer.residual_function = None
# pack the explainer into a pickle file
import pickle
pickle_out = open('explainer_keras.pickle', 'wb')
pickle.dump(explainer, pickle_out)
pickle_out.close()
Then, use modelStudio
in R:
# load the explainer from the pickle file
library(reticulate)
#! add blank create_architecture function before load !
py_run_string("
def create_architecture():
return True
")
explainer <- py_load_object("explainer_keras.pickle", pickle = "pickle")
# make a studio for the model
library(modelStudio)
modelStudio(explainer)