plot_model
此函数分析训练好的模型在保留集上的性能。在某些情况下,可能需要重新训练模型。
示例
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
lr = create_model('lr')
# plot model
plot_model(lr, plot = 'auc')
更改比例
可以通过参数 scale
更改图形的分辨率比例。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
lr = create_model('lr')
# plot model
plot_model(lr, plot = 'auc', scale = 3)
保存图表
您可以使用参数 save
将图表保存为 png
文件。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
lr = create_model('lr')
# plot model
plot_model(lr, plot = 'auc', save = True)
自定义图表
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
lr = create_model('lr')
# plot model
plot_model(lr, plot = 'confusion_matrix', plot_kwargs = {'percent' : True})
使用训练数据
如果您想评估模型在训练数据上的图表,可以在 plot_model
函数中传入 use_train_data=True
。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
lr = create_model('lr')
# plot model
plot_model(lr, plot = 'auc', use_train_data = True)
训练数据与保留数据上的图表
按模块划分的示例
分类
回归
聚类
异常检测
evaluate_model
# load dataset
from pycaret.datasets import get_data
juice = get_data('juice')
# init setup
from pycaret.classification import *
exp_name = setup(data = juice, target = 'Purchase')
# create model
lr = create_model('lr')
# launch evaluate widget
evaluate_model(lr)
interpret_model
示例
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost)
保存图表
您可以使用参数 save
将图表保存为 png
文件。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, save = True)
更改图表类型
有几种不同的图表类型可用,可以通过参数 plot
更改。
相关性
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, plot = 'correlation')
默认情况下,PyCaret 使用数据集中的第一个特征,但这可以通过参数 feature
更改。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, plot = 'correlation', feature = 'Age (years)')
部分依赖图
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, plot = 'pdp')
默认情况下,PyCaret 使用数据集中第一个可用的特征,但这可以通过参数 feature
更改。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, plot = 'pdp', feature = 'Age (years)')
Morris 敏感性分析
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, plot = 'msa')
置换特征重要性
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, plot = 'pfi')
原因图
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, plot = 'reason')
当您生成 reason
图表而不传递测试数据的特定索引时,将显示交互式图表,您可以在其中选择 x 轴和 y 轴。这仅在使用 Jupyter Notebook 或类似环境时才可能。如果您想查看特定观测的图表,您必须在 observation
参数中传递索引。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, plot = 'reason', observation = 1)
这里的 observation = 1
表示测试集中的索引 1。
使用训练数据
默认情况下,所有图表都在测试数据集上生成。如果您想使用训练数据集生成图表(不推荐),可以使用 use_train_data
参数。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# creating a model
xgboost = create_model('xgboost')
# interpret model
interpret_model(xgboost, use_train_data = True)
dashboard
仪表盘示例
# load dataset
from pycaret.datasets import get_data
juice = get_data('juice')
# init setup
from pycaret.classification import *
exp_name = setup(data = juice, target = 'Purchase')
# train model
lr = create_model('lr')
# launch dashboard
dashboard(lr)
视频
check_fairness
有许多方法可以概念化公平性。check_fairness
函数遵循一种被称为群体公平性的方法,它询问:哪些群体有遭受损害的风险。check_fairness
提供了不同群体(也称为子群体)之间的公平性相关指标。
检查公平性示例
# load dataset
from pycaret.datasets import get_data
income = get_data('income')
# init setup
from pycaret.classification import *
exp_name = setup(data = income, target = 'income >50K')
# train model
lr = create_model('lr')
# check model fairness
lr_fairness = check_fairness(lr, sensitive_features = ['sex', 'race'])
视频
get_leaderboard
此函数返回当前设置中训练的所有模型的排行榜。
# load dataset
from pycaret.datasets import get_data
diabetes = get_data('diabetes')
# init setup
from pycaret.classification import *
clf1 = setup(data = diabetes, target = 'Class variable')
# compare models
top3 = compare_models(n_select = 3)
# tune top 3 models
tuned_top3 = [tune_model(i) for i in top3]
# ensemble top 3 tuned models
ensembled_top3 = [ensemble_model(i) for i in tuned_top3]
# blender
blender = blend_models(tuned_top3)
# stacker
stacker = stack_models(tuned_top3)
# check leaderboard
get_leaderboard()
您也可以通过它访问训练好的流水线。
# check leaderboard
lb = get_leaderboard()
# select top model
lb.iloc[0]['Model']
assign_model
聚类
# load dataset
from pycaret.datasets import get_data
jewellery = get_data('jewellery')
# init setup
from pycaret.clustering import *
clu1 = setup(data = jewellery)
# train a model
kmeans = create_model('kmeans')
# assign model
assign_model(kmeans)
异常检测