讯享网
来源:Deephub Imba 本文约15000字,建议阅读15分钟本文将通过视觉方式解释用于分类和回归问题的决策树的理论基础。
讯享网
讯享网
import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom sklearn.tree import DecisionTreeClassifierfrom sklearn.tree import DecisionTreeRegressorfrom sklearn import treefrom matplotlib.colors import ListedColormapnp.random.seed(7)low_r = 10high_r = 15n = 1550X = np.random.uniform(low=[0, 0], high=[4, 4], size=(n,2))drop = (X[:, 0]2 + X[:, 1]2 > low_r) & (X[:, 0]2 + X[:, 1]2 < high_r)X = X[~drop]y = (X[:, 0]2 + X[:, 1]2 >= high_r).astype(int)colors = [‘red’, ‘blue’]plt.figure(figsize=(6, 6))for i in np.unique(y):plt.scatter(X[y==i, 0], X[y==i, 1], label = “y=”+str(i),color=colors[i], edgecolor=“white”, s=50)circle = plt.Circle((0, 0), 3.5, color=‘black’, fill=False,linestyle=“–”, label=“Actual boundary”)plt.xlim([-0.1, 4.2])plt.ylim([-0.1, 5])ax = plt.gca()ax.set_aspect(‘equal’)ax.add_patch(circle)plt.xlabel(’\(x_1\)’, fontsize=16)plt.ylabel(’\(x_2\)’, fontsize=16)plt.legend(loc=‘best’, fontsize=11)plt.show()

讯享网
tree_clf = DecisionTreeClassifier(random_state=0)tree_clf.fit(X, y)plt.figure(figsize=(17,12))tree.plot_tree(tree_clf, fontsize=17, feature_names=[“x1”, “x2”])plt.show()

讯享网
def plot_boundary_lines(tree_model):def helper(node, x1_min, x1_max, x2_min, x2_max):if feature[node] == 0:plt.plot([threshold[node], threshold[node]],[x2_min, x2_max], color=“black”)if feature[node] == 1:plt.plot([x1_min, x1_max], [threshold[node],threshold[node]], color=“black”)if children_left[node] != children_right[node]:if feature[node] == 0:helper(children_left[node], x1_min,threshold[node], x2_min, x2_max)helper(children_right[node], threshold[node],x1_max, x2_min, x2_max)else:helper(children_left[node], x1_min, x1_max,x2_min, threshold[node])helper(children_right[node], x1_min, x1_max,threshold[node], x2_max)feature = treemodel.tree.featurethreshold = treemodel.tree.thresholdchildren_left = treemodel.tree.children_leftchildren_right = treemodel.tree.children_rightx1_min = x2_min = -1x1_max = x2_max = 5helper(0, x1_min, x1_max, x2_min, x2_max)
讯享网
def plot_boundary(X, y, clf, lims):gx1, gx2 = np.meshgrid(np.arange(lims[0], lims[1],(lims[1]-lims[0])/300.0),np.arange(lims[2], lims[3],(lims[3]-lims[2])/300.0))cmap_light = ListedColormap([‘lightsalmon’, ‘aqua’])gx1l = gx1.flatten()gx2l = gx2.flatten()gx = np.vstack((gx1l,gx2l)).Tgyhat = clf.predict(gx)gyhat = gyhat.reshape(gx1.shape)plt.pcolormesh(gx1, gx2, gyhat, cmap=cmap_light)plt.scatter(X[y==0, 0], X[y==0,1], label=“y=0”, alpha=0.7,color=“red”, edgecolor=“white”, s=50)plt.scatter(X[y==1, 0], X[y==1,1], label=“y=1”, alpha=0.7,color=“blue”, edgecolor=“white”, s=50)plt.legend(loc=‘upper left’)
讯享网
plt.figure(figsize=(6,6))plot_boundary(X, y, tree_clf, lims=[-1, 5, -1, 5])plot_boundary_lines(tree_clf)ax = plt.gca()ax.set_aspect(‘equal’)plt.xlim([-1, 5])plt.ylim([-1, 5])plt.xlabel(’\(x_1\)’, fontsize=16)plt.ylabel(’\(x_2\)’, fontsize=16)plt.show()
讯享网
plt.figure(figsize=(6,6))plot_boundary(X, y, tree_clf, lims=[-1, 5, -1, 5])circle = plt.Circle((0, 0), 3.5, color=‘black’, fill=False,linestyle=“–”, label=“Actual boundary”)plt.text(3.5, 4.5, r”\(\hat{y}=1\)”, fontsize=13)plt.text(2.35, 2.1, r”\(\hat{y}=0\)”, fontsize=13)ax = plt.gca()ax.set_aspect(‘equal’)ax.add_patch(circle)plt.xlabel(’\(x_1\)’, fontsize=16)plt.ylabel(’\(x_2\)’, fontsize=16)plt.xlim([-0.1, 4.2])plt.ylim([-0.1, 5])plt.legend(loc=‘upper left’)plt.show()
讯享网
np.random.seed(1)n = 550X1 = np.random.uniform(low=[0, 0], high=[4, 4], size=(n,2))drop = (X1[:, 0] > 1.8) & (X1[:, 0] < 1.9)X1 = X1[drop]y1 = (X1[:, 0] > 1.9).astype(int)X2 = np.random.uniform(low=[1.7, 0], high=[1.9, 4], size=(15,2))y2 = np.ones(15).astype(int)X = np.concatenate((X1, X2), axis=0)y = np.concatenate((y1, y2))colors = [‘red’, ‘blue’]for i in np.unique(y):plt.scatter(X[y==i, 0], X[y==i, 1], label = “y=”+str(i),color=colors[i], edgecolor=“white”, s=50)plt.axvline(x=1.8, color=“black”, linestyle=“–”)plt.legend(loc=‘best’)plt.xlim([-0.5, 4.5])plt.ylim([-0.2, 5])ax = plt.gca()ax.set_aspect(‘equal’)plt.xlabel(’\(x_1\)’, fontsize=16)plt.ylabel(’\(x_2\)’, fontsize=16)plt.show()
讯享网
#Listing 8tree_clf = DecisionTreeClassifier(random_state=1)tree_clf.fit(X, y)plt.figure(figsize=(13,10))tree.plot_tree(tree_clf, fontsize=9, feature_names=[“x1”, “x2”])plt.show()

讯享网
#Listing 9plt.figure(figsize=(6,6))plot_boundary(X, y, tree_clf, lims=[-1, 5, -1, 5])plt.axvline(x=1.8, color=“black”, linestyle=“–”, label=“Actual boundary”)plt.text(0, -0.3, r”\(\hat{y}=0\)”, fontsize=13)plt.text(3, -0.3, r”\(\hat{y}=1\)”, fontsize=13)ax = plt.gca()ax.set_aspect(‘equal’)plt.xlim([-0.5, 4.5])plt.ylim([-0.5, 4.5])plt.xlabel(’\(x_1\)’, fontsize=16)plt.ylabel(’\(x_2\)’, fontsize=16)plt.legend(loc=“best”)plt.show()
讯享网
tree_clf1 = DecisionTreeClassifier(random_state=1, max_depth=1)tree_clf1.fit(X, y)plt.figure(figsize=(10,5))tree.plot_tree(tree_clf1, fontsize=9, feature_names=[“x1”, “x2”]) plt.show()


讯享网
#Listing 11plt.figure(figsize=(6,6))plot_boundary(X, y, tree_clf1, lims=[-1, 5, -1, 5])plt.axvline(x=1.8, color=“black”, linestyle=“–”, label=“Actual boundary”)plt.text(0, -0.3, r”\(\hat{y}=0\)”, fontsize=13)plt.text(3, -0.3, r”\(\hat{y}=1\)”, fontsize=13)ax = plt.gca()ax.set_aspect(‘equal’)plt.xlim([-0.5, 4.5])plt.ylim([-0.5, 4.5])plt.xlabel(’\(x_1\)’, fontsize=16)plt.ylabel(’\(x_2\)’, fontsize=16)plt.legend(loc=“best”)plt.show()
讯享网
np.random.seed(4)x = np.linspace(0, 3, 60)x1 = np.linspace(0, 1, 20)x2 = np.linspace(1, 3, 40)y = x.copy()y[x>=1] = 1y = y + np.random.normal(scale=0.1, size=60)X = x.reshape(-1, 1)plt.figure(figsize=(8,8))plt.scatter(x, y, label=“Noisy data points”)plt.plot(x1, x1, color=“blue”, alpha=0.5, label=“Trend”)plt.plot(x2, len(x2)*[1], color=“blue”, alpha=0.5)plt.xlim([-0.1, 3.1])plt.ylim([-0.1, 2])plt.xlabel(’\(x\)’, fontsize=16)plt.ylabel(’\(y\)’, fontsize=16)ax = plt.gca()ax.set_aspect(‘equal’)plt.legend(loc=“best”, fontsize=14)plt.show()

讯享网
tree_regres = DecisionTreeRegressor(random_state=0, max_depth=3)tree_regres.fit(X, y)plt.figure(figsize=(17,8))tree.plot_tree(tree_regres, fontsize=10, feature_names=[“x”]) plt.show()

讯享网
y.mean() #0.828
讯享网
((y.mean()-y)2).mean() #0.102
讯享网
y[(X <=0.585).flatten()].mean() #0.262
讯享网
((0.262 - y[(X <= 0.585).flatten()])2).mean() #0.037
讯享网
x1 = np.linspace(0, 1, 20)x2 = np.linspace(1, 3, 40)X_space = np.linspace(-0.3, 3.3, 1000).reshape(-1, 1)yhat = tree_regres.predict(X_space)plt.figure(figsize=(8,6))plt.scatter(x, y, label=“Training data”)plt.plot(X_space, yhat, color=“red”, label=“prediction”)plt.plot(x1, x1, color=“blue”, alpha=0.5, label=“Trend”)plt.plot(x2, len(x2)*[1], color=“blue”, alpha=0.5)plt.legend(loc=“best”, fontsize=14)plt.xlim([-0.3, 3.3])plt.ylim([-0.1, 2])ax = plt.gca()ax.set_aspect(‘equal’)plt.xlabel(’\(x\)’, fontsize=14)plt.ylabel(’\(y\)’, fontsize=14)plt.show()
讯享网
X_space = np.linspace(-0.3, 3.3, 1000).reshape(-1, 1)tree_regres = DecisionTreeRegressor(random_state=1)tree_regres.fit(X, y)yhat = tree_regres.predict(X_space)plt.figure(figsize=(8,6))plt.scatter(X, y, label=“Training data”)plt.plot(X_space, yhat, color=“red”, label=“prediction”)plt.xlim([-0.3, 3.3])plt.ylim([-0.1, 2])plt.legend(loc=“best”, fontsize=14)plt.xlabel(’\(x\)’, fontsize=14)plt.ylabel(’\(y\)’, fontsize=14)ax = plt.gca()ax.set_aspect(‘equal’)plt.show()
讯享网
treeregres.tree.max_depth #11
讯享网
tree_regres.get_n_leaves() #60
讯享网
tree_regres.score(X,y) #1.0

讯享网
class GradBoostingRegressor():def init(self, num_estimators, learning_rate, max_depth=1):self.num_estimators = num_estimatorsself.learning_rate = learning_rateself.max_depth = max_depthself.tree_list = []def fit(self, X, y):self.F0 = y.mean()Fm = self.F0for i in range(self.num_estimators):tree_reg = DecisionTreeRegressor(max_depth=self.max_depth,random_state=0)tree_reg.fit(X, y - Fm)Fm += self.learning_rate * tree_reg.predict(X)self.tree_list.append(tree_reg)def predict(self, X):y_hat = self.F0 + self.learning_rate * </span>np.sum([t.predict(X) for t in self.tree_list], axis=0)return y_hat
讯享网
M = 9X_space = np.linspace(-0.3, 3.3, 1000).reshape(-1, 1)gbm_reg = GradBoostingRegressor(num_estimators=M+1, learning_rate=0.3)gbm_reg.fit(X, y)fig, axs = plt.subplots(M+1, 2, figsize=(11, 45))plt.subplots_adjust(hspace=0.3)axs[0, 0].axis(‘off’)axs[0, 1].scatter(X, y, label=“y”)axs[0, 1].axhline(y=gbm_reg.F0, color=“red”, label=”\(F_0(x)\)”)axs[0, 1].set_title(“m=0”, fontsize=14)axs[0, 1].set_xlim([-0.3, 3.3])axs[0, 1].set_ylim([-0.5, 2])axs[0, 1].legend(loc=“best”, fontsize=12)axs[0, 1].set_aspect(‘equal’)axs[0, 1].set_xlabel(“x”, fontsize=13)axs[0, 1].set_ylabel(“y”, fontsize=13)for i in range(1, M+1):Fi_minus_1 = gbm_reg.F0 + gbm_reg.learning_rate * </span>np.sum([t.predict(X) for t in gbm_reg.tree_list[:i-1]],axis=0)axs[i, 0].scatter(X, y-Fi_minus1, label=f”$y-F{{{i-1}}}(x)\(")</span></code><code><span class="code-snippet_outer"> axs[i, 0].plot(X_space, gbm_reg.tree_list[i-1].predict(X_space),</span></code><code><span class="code-snippet_outer"> color="red",label=f"\)h_{{{i}}}(x)\(")</span></code><code><span class="code-snippet_outer"> axs[i, 0].set_title("m={}".format(i), fontsize=14)</span></code><code><span class="code-snippet_outer"> axs[i, 0].set_xlim([-0.3, 3.3])</span></code><code><span class="code-snippet_outer"> axs[i, 0].set_ylim([-1, 2])</span></code><code><span class="code-snippet_outer"> axs[i, 0].set_xlabel("x", fontsize=13)</span></code><code><span class="code-snippet_outer"> axs[i, 0].set_ylabel("residual", fontsize=13)</span></code><code><span class="code-snippet_outer"> axs[i, 0].legend(loc="best", fontsize=12)</span></code><code><span class="code-snippet_outer"> axs[i, 0].set_aspect('equal')</span></code><code><span class="code-snippet_outer"><br /></span></code><code><span class="code-snippet_outer"> axs[i, 1].scatter(X, y, label="y")</span></code><code><span class="code-snippet_outer"> Fi = gbm_reg.F0 + gbm_reg.learning_rate * \</span></code><code><span class="code-snippet_outer"> np.sum([t.predict(X_space) for t in gbm_reg.tree_list[:i]],</span></code><code><span class="code-snippet_outer"> axis=0)</span></code><code><span class="code-snippet_outer"> axs[i, 1].plot(X_space, Fi, color="red", label=f"\)F_{{{i}}}(x)\(")</span></code><code><span class="code-snippet_outer"> axs[i, 1].set_title("m={}".format(i), fontsize=14)</span></code><code><span class="code-snippet_outer"> axs[i, 1].set_xlim([-0.3, 3.3])</span></code><code><span class="code-snippet_outer"> axs[i, 1].set_ylim([-0.5, 2])</span></code><code><span class="code-snippet_outer"> axs[i, 1].set_xlabel("x", fontsize=13)</span></code><code><span class="code-snippet_outer"> axs[i, 1].set_ylabel("y", fontsize=13)</span></code><code><span class="code-snippet_outer"> axs[i, 1].legend(loc="best", fontsize=13)</span></code><code><span class="code-snippet_outer"> axs[i, 1].set_aspect('equal')</span></code><code><span class="code-snippet_outer"> plt.show()</span></code></pre></section></pre><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZwUwiacO0vlzoMMQgXJtKBj8bMe0dyLFHtT0h4iaKDU6EDcWmAwqtP9vw/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="3.55556" data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZwUwiacO0vlzoMMQgXJtKBj8bMe0dyLFHtT0h4iaKDU6EDcWmAwqtP9vw/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="720" style="outline: 0px;visibility: visible !important;width: 677px !important;" /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">模型的总预测 ( F_i(x) ) 在每一步中都有所改进。在这个例子中,我们只使用了9棵决策树,但如果使用更多的树会发生什么呢?</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><pre spellcheck="false" cid="n261" mdtype="fences" data-style="overflow: visible; font-size: 0.85rem; break-inside: avoid; text-align: left; white-space: normal; background-image: inherit; background-position: inherit; background-size: inherit; background-repeat: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; width: inherit; border-width: 1px; border-style: solid; border-color: rgb(244, 244, 244); -webkit-font-smoothing: initial; line-height: 1.43rem; border-radius: 2px; overflow-wrap: normal; color: rgb(82, 97, 106); letter-spacing: normal; font-family: "PingFang SC", "Lantinghei SC", "Microsoft Yahei", "Hiragino Sans GB", "Microsoft Sans Serif", "WenQuanYi Micro Hei", sans !important; background-color: rgb(240, 245, 249); padding: 0.3rem 0rem !important; margin-top: 0.8rem !important; margin-bottom: 0.8rem !important;" class="js_darkmode__34"><section style="line-height: 1.5em;"><br /></section></pre><section class="code-snippet__fix code-snippet__js"><ul class="code-snippet__line-index code-snippet__js"><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li></ul><pre class="code-snippet__js" data-lang="nginx"><code><span class="code-snippet_outer"> X_space = np.linspace(-0.3, 3.3, 1000).reshape(-1, 1)</span></code><code><span class="code-snippet_outer"> gbm_reg = GradBoostingRegressor(num_estimators=50, learning_rate=0.3)</span></code><code><span class="code-snippet_outer"> gbm_reg.fit(X, y)</span></code><code><span class="code-snippet_outer"> y_hat = gbm_reg.predict(X_space)</span></code><code><span class="code-snippet_outer"><br /></span></code><code><span class="code-snippet_outer"> plt.figure(figsize=(8,6))</span></code><code><span class="code-snippet_outer"> plt.scatter(x, y, label="Training data")</span></code><code><span class="code-snippet_outer"> plt.plot(X_space, y_hat, color="red", label="prediction")</span></code><code><span class="code-snippet_outer"><br /></span></code><code><span class="code-snippet_outer"> plt.xlim([-0.3, 3.3])</span></code><code><span class="code-snippet_outer"> plt.ylim([-0.1, 2])</span></code><code><span class="code-snippet_outer"> plt.legend(loc="best", fontsize=14)</span></code><code><span class="code-snippet_outer"> plt.xlabel('\)x\(', fontsize=14)</span></code><code><span class="code-snippet_outer"> plt.ylabel('\)y\(', fontsize=14)</span></code><code><span class="code-snippet_outer"> ax = plt.gca() </span></code><code><span class="code-snippet_outer"> ax.set_aspect('equal')</span></code><code><span class="code-snippet_outer"> plt.show()</span></code><code><span class="code-snippet_outer"><br /></span></code></pre></section><p><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZwMzySa1Cx010aogfgxY7jYRiaic88KSaKVoTebXZ1ibE8gWUhqsBVwujQ/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0.66667" data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZwMzySa1Cx010aogfgxY7jYRiaic88KSaKVoTebXZ1ibE8gWUhqsBVwujQ/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="720" style="outline: 0px;visibility: visible !important;width: 677px !important;" /></p><p><br /></p><section style="line-height: 1.5em;"><span style="font-size: 15px;">在 scikit-learn 库中,GradientBoostingRegressor 类也可以用于梯度提升回归。这里我们使用这个类来测试我们的实现:</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><pre spellcheck="false" cid="n266" mdtype="fences" data-style="overflow: visible; font-size: 0.85rem; break-inside: avoid; text-align: left; white-space: normal; background-image: inherit; background-position: inherit; background-size: inherit; background-repeat: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; width: inherit; border-width: 1px; border-style: solid; border-color: rgb(244, 244, 244); -webkit-font-smoothing: initial; line-height: 1.43rem; border-radius: 2px; overflow-wrap: normal; color: rgb(82, 97, 106); letter-spacing: normal; font-family: "PingFang SC", "Lantinghei SC", "Microsoft Yahei", "Hiragino Sans GB", "Microsoft Sans Serif", "WenQuanYi Micro Hei", sans !important; background-color: rgb(240, 245, 249); padding: 0.3rem 0rem !important; margin-top: 0.8rem !important; margin-bottom: 0.8rem !important;" class="js_darkmode__36"><section class="code-snippet__fix code-snippet__js"><ul class="code-snippet__line-index code-snippet__js"><li></li><li></li><li></li><li></li><li></li><li></li><li></li></ul><pre class="code-snippet__js" data-lang="properties"><code><span class="code-snippet_outer"> from sklearn.ensemble import GradientBoostingRegressor</span></code><code><span class="code-snippet_outer"> gbm_reg_sklrean = GradientBoostingRegressor(n_estimators=50,</span></code><code><span class="code-snippet_outer"> learning_rate=0.3,</span></code><code><span class="code-snippet_outer"> max_depth=1)</span></code><code><span class="code-snippet_outer"> gbm_reg_sklrean.fit(X, y)</span></code><code><span class="code-snippet_outer"> y_hat_sklrean = gbm_reg_sklrean.predict(X_space)</span></code><code><span class="code-snippet_outer"> np.allclose(y_hat, y_hat_sklrean)</span></code></pre></section></pre><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">让我们将其与上面的决策树回归器的预测进行比较。那棵树的深度是11,当在同一数据集上训练时出现了过拟合问题。那么为什么包含49棵树的梯度提升回归器没有遭受过拟合?要回答这个问题,我们需要更深入地了解这两种模型以及它们如何处理训练数据集。下图展示了决策树回归器中的不同节点是如何处理训练数据集的。</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZIOiaSibtZ3ibP8n0lE51Tm23qSXPkyaiaHbB6ZMLnicH9iaeds7cvEQjQEYA/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0.55556" data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZIOiaSibtZ3ibP8n0lE51Tm23qSXPkyaiaHbB6ZMLnicH9iaeds7cvEQjQEYA/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="720" style="outline: 0px;visibility: visible !important;width: 677px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">根节点作为最顶端的节点,能够看到整个数据集,并能够正确地检测到数据集的上升趋势。因为它能够看到整体情况,所以可以区分数据集的趋势与噪声。相反,较深的节点只能看到从其父节点传递给它的原始数据集的一小部分。这小部分的变化主要是由于噪声引起的,由于这是节点唯一能看到的东西,它将噪声视为趋势,并预测出一个下降趋势。随着树的加深,节点更容易受到噪声的影响,并试图学习这些噪声,结果就是过拟合。</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">下图则展示了梯度提升回归器中的不同模型(或节点)如何看待训练数据集。每个模型看到的是整个数据集的残差。因此它能够更可靠地检测出主要趋势,因此它对过拟合更具鲁棒性。</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZeIK83B5ibmEPW8dLZOciaBHiaqTS9rbYj35VfBjfibtCA8orrl5bbWISYQ/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0.22223" data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZeIK83B5ibmEPW8dLZOciaBHiaqTS9rbYj35VfBjfibtCA8orrl5bbWISYQ/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="720" style="outline: 0px;visibility: visible !important;width: 677px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">当模型的每个部分只能访问数据的局部时,它们可能会过度拟合局部的噪声,而忽略全局的真实模式。</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><p><span style="font-size: 15px;"><br /></span></p><section style="line-height: 1.5em;"><span style="font-size: 15px;">在本节中,我们解释梯度提升分类算法,但我们只关注二元分类问题。在二元分类问题中,目标只能取两个标签,分别用0和1表示。设得到1的概率为 ( p )。事件发生的赔率是事件发生的概率与事件不发生的概率之比。因此,得到1的赔率为:</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZAITBM8e2YF80OvlibfIkC1vNULRJ2pLOicvdvMHQRPjibxPKnics9K3Usg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-type="webp" data-w="247" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZAITBM8e2YF80OvlibfIkC1vNULRJ2pLOicvdvMHQRPjibxPKnics9K3Usg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 247px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">概率的对数(也就是logit函数)被定义为:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZxE1nLic3jgDTmTQtLR9kPplTfltkU2PjhSKX9pdTbUFia5YHZbcdxKWg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-type="webp" data-w="444" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZxE1nLic3jgDTmTQtLR9kPplTfltkU2PjhSKX9pdTbUFia5YHZbcdxKWg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 444px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">但为什么我们需要计算赔率的对数呢?如果我们将 ( p ) 设为0,赔率的对数为 ( -∞),如果将 ( p ) 设为1,赔率的对数为 ( +∞ )。概率 ( p ) 始终在0和1之间,但赔率的对数将其转换为 ( -∞ ) 到 ( +∞) 之间的实数。如果我们有了赔率的对数(log(odds)),我们可以使用以下公式来计算对应的概率:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZM13GdOKy9G7aHuHo65vylsB1OaZXlf6rupLLn1D2Tpx7p5b8rd0KUA/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0.022222" data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZM13GdOKy9G7aHuHo65vylsB1OaZXlf6rupLLn1D2Tpx7p5b8rd0KUA/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="720" style="outline: 0px;visibility: visible !important;width: 677px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">这个公式表示为逻辑回归中的sigmoid函数,用于将线性回归模型的输出转换为概率值。梯度提升分类器利用这一转换来预测类别概率,并通过最大化似然函数来优化模型。</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">梯度增强分类算法如下:</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZIMJD6zvmHahhEFsmOu6QbFdKUsrjEJEIPpCl3R5PuqJLEcp5C4tPNg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="1.2625" data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZIMJD6zvmHahhEFsmOu6QbFdKUsrjEJEIPpCl3R5PuqJLEcp5C4tPNg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="720" style="outline: 0px;visibility: visible !important;width: 677px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">梯度提升从对训练数据集的预测做出初步猜测开始。在梯度提升回归器中,初始猜测是训练数据集中所有数据点的目标平均值。对于分类问题,这里的初始猜测是训练数据集中目标值为1的概率。这个概率由下式给出:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZA3bJLFC8zHibI0VicAqQKHfDwJyhBCib5zQpTYT9VpAqLSmGqia3GOk96Q/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-type="webp" data-w="265" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZA3bJLFC8zHibI0VicAqQKHfDwJyhBCib5zQpTYT9VpAqLSmGqia3GOk96Q/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 265px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">其中,N是训练数据集中的示例数量。函数 ( F_0(x) ) 是一个简单的基模型,它预测的是这个概率的赔率的对数:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZ8aofpgBKJIZlnbVkmgTdG8elnicUDVfv7r10zmib2OnEmx2WuzTksyNA/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZ8aofpgBKJIZlnbVkmgTdG8elnicUDVfv7r10zmib2OnEmx2WuzTksyNA/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="413" style="outline: 0px;visibility: visible !important;width: 413px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">对于训练数据集中的示例,基模型的残差计算如下:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZjrp2eDkduaVhVCSrOPufbds2ibaI0ep0laZDjfoz2Z3YjtIt4vpk70Q/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-type="webp" data-w="159" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZjrp2eDkduaVhVCSrOPufbds2ibaI0ep0laZDjfoz2Z3YjtIt4vpk70Q/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 159px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">这里的残差是使用概率而不是赔率的对数计算的。接下来创建一个浅层决策树回归器来预测训练数据集的残差。这个回归器记为 ( h_1 ),以 x 为特征,以 ( y - p(x) ) 为目标。在训练完树回归器后,我们需要修改其叶节点的值。对于树中的每个叶节点(记为 ( l )),我们将叶节点的值 ( v_l ) 修改为:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZAAj4DhxhsNaR6o97IMHg0NEZDoXdb78sJM9FicJFiapxsR6iblbJOTmkA/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-type="webp" data-w="439" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZAAj4DhxhsNaR6o97IMHg0NEZDoXdb78sJM9FicJFiapxsR6iblbJOTmkA/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 439px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">其中,L 是落在该叶子节点中的所有训练集示例的集合。我们用 ( (x_i, y_i) ) 表示L中的每个示例,( p_i ) 是预测特征 ( x_i ) 的目标等于1的概率。因此,分子是L中所有示例残差的和,分母是L中所有示例的 ( p(1-p) ) 的和。使用修改后的树,我们现在可以预测训练数据集中所有示例的目标。</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">修改后的树回归器的预测记为 ( h_1(x) )。我们将这棵树的预测加到基模型的预测上:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZabic4ibQKhteia6oNfoiaibPQPQTM4zibsg88WDSvr5yiappt22qudry0DFBg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZabic4ibQKhteia6oNfoiaibPQPQTM4zibsg88WDSvr5yiappt22qudry0DFBg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="430" style="outline: 0px;visibility: visible !important;width: 430px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">这里的 ( F_1(x) ) 相比初始预测 ( F_0(x) ) 是一个更好的目标预测,预测的是特征 ( x ) 的目标的赔率的对数。现在,我们使用以下公式计算训练数据集中所有示例目标等于1的预测概率:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZLxq2j8d8ia9kyA5XUDb8THcIT6HDGkRs3icCPtwmxwzDBYHSyroTJz8g/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-type="webp" data-w="315" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZLxq2j8d8ia9kyA5XUDb8THcIT6HDGkRs3icCPtwmxwzDBYHSyroTJz8g/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 315px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">接下来,我们计算 ( F_1(x) ) 对训练数据集的残差:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZjrp2eDkduaVhVCSrOPufbds2ibaI0ep0laZDjfoz2Z3YjtIt4vpk70Q/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-type="webp" data-w="159" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZjrp2eDkduaVhVCSrOPufbds2ibaI0ep0laZDjfoz2Z3YjtIt4vpk70Q/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 159px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">然后我们训练另一个浅层决策树,以 x 为特征,以 ( y - p(x) ) 为目标。这个模型的预测记为 ( h_2(x) ),将其添加到 ( F_1(x) ) 中:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZPuEarhoterhIKkbffpo87D7W8Ju9kYjeU6uS7XwP4h7kz2qdHMgORg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0." data-s="300,640" data-type="webp" data-w="431" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZPuEarhoterhIKkbffpo87D7W8Ju9kYjeU6uS7XwP4h7kz2qdHMgORg/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 431px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">这个过程可以重复 M 次。每次都从前一个模型中计算训练数据集的残差,并训练决策树 ( h_m ) 来预测这些残差。因此,( h_m ) 以 x 为特征,以 ( y - p(x) ) 为目标。然后将这棵树的预测添加到前一个模型的预测中以改进它:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZUEnpuIw5aubmCzKt7yBhDZyFjyicd4YxUkawWyS9icvbzfy0ib08RKsibQ/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0.082704" data-s="300,640" data-type="webp" data-w="503" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZUEnpuIw5aubmCzKt7yBhDZyFjyicd4YxUkawWyS9icvbzfy0ib08RKsibQ/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" style="outline: 0px;visibility: visible !important;width: 503px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">简化这个递归方程,得到提升模型的最终预测如下:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZpW93X3UqbhRFiccT08iczsNay2VcWOpUTXlaDfenz8KWZcJ04Ayw6dzQ/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0.72727" data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZpW93X3UqbhRFiccT08iczsNay2VcWOpUTXlaDfenz8KWZcJ04Ayw6dzQ/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="528" style="outline: 0px;visibility: visible !important;width: 528px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">这与梯度提升回归器的方程完全相同。这个方程给出了特征 ( x ) 的预测赔率的对数,它也可以用于不在训练数据集中的未见特征 ( x )。目标为1的预测概率由以下公式给出:</span></section><section style="line-height: 1.5em;"><img src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZKax7RGIR6xcTvaUokI3wtQLsve09DnB2zXfHhGhBkNIRkkiaFcPRUkw/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" class="rich_pages wxw-img js_insertlocalimg" data-imgfileid="" data-ratio="0.80243" data-s="300,640" data-src="https://mmbiz.qpic.cn/mmbiz_jpg/6wQyVOrkRNKyz6cLngiaDKBnwykmibquOZKax7RGIR6xcTvaUokI3wtQLsve09DnB2zXfHhGhBkNIRkkiaFcPRUkw/640?wx_fmt=jpeg&from=appmsg&tp=wxpic&wxfrom=5&wx_lazy=1&wx_co=1" data-type="webp" data-w="329" style="outline: 0px;visibility: visible !important;width: 329px !important;" /></section><section style="line-height: 1.5em;"><br /></section><section style="line-height: 1.5em;"><span style="font-size: 15px;">我们可以将这个概率与一个阈值进行比较,以获得二元目标的最终预测。这个阈值通常为0.5。如果 ( p(x) \geq 0.5 ),则预测目标为1,否则为0。</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><section style="line-height: 1.5em;text-align: center;"><span style="color: rgb(166, 91, 203);"><strong><span style="font-size: 15px;">下面就是算法的python实现</span></strong></span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><pre spellcheck="false" cid="n302" mdtype="fences" data-style="overflow: visible; font-size: 0.85rem; break-inside: avoid; text-align: left; white-space: normal; background-image: inherit; background-position: inherit; background-size: inherit; background-repeat: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; width: inherit; border-width: 1px; border-style: solid; border-color: rgb(244, 244, 244); -webkit-font-smoothing: initial; line-height: 1.43rem; border-radius: 2px; overflow-wrap: normal; color: rgb(82, 97, 106); letter-spacing: normal; font-family: "PingFang SC", "Lantinghei SC", "Microsoft Yahei", "Hiragino Sans GB", "Microsoft Sans Serif", "WenQuanYi Micro Hei", sans !important; background-color: rgb(240, 245, 249); padding: 0.3rem 0rem !important; margin-top: 0.8rem !important; margin-bottom: 0.8rem !important;" class="js_darkmode__37"><section style="line-height: 1.5em;"><span style="font-size: 15px;"></span></section></pre><section class="code-snippet__fix code-snippet__js"><ul class="code-snippet__line-index code-snippet__js"><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li></ul><pre class="code-snippet__js" data-lang="objectivec"><code><span class="code-snippet_outer"><span class="code-snippet__keyword">class</span> GradBoostingClassifier(): </span></code><code><span class="code-snippet_outer"> def __init__(<span class="code-snippet__keyword">self</span>, num_estimators, learning_rate, max_depth=<span class="code-snippet__number">1</span>):</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">self</span>.num_estimators = num_estimators</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">self</span>.learning_rate = learning_rate</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">self</span>.max_depth = max_depth</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">self</span>.tree_list = []</span></code><code><span class="code-snippet_outer"> def fit(<span class="code-snippet__keyword">self</span>, X, y): </span></code><code><span class="code-snippet_outer"> probability = y.mean()</span></code><code><span class="code-snippet_outer"> log_of_odds = np.log(probability / (<span class="code-snippet__number">1</span> - probability))</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">self</span>.F0 = log_of_odds</span></code><code><span class="code-snippet_outer"> Fm = np.array([log_of_odds]*len(y))</span></code><code><span class="code-snippet_outer"> probs = np.array([probability]*len(y))</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">for</span> i <span class="code-snippet__keyword">in</span> range(<span class="code-snippet__keyword">self</span>.num_estimators):</span></code><code><span class="code-snippet_outer"> residuals = y - probs</span></code><code><span class="code-snippet_outer"> tree_reg = DecisionTreeRegressor(max_depth=<span class="code-snippet__keyword">self</span>.max_depth)</span></code><code><span class="code-snippet_outer"> tree_reg.fit(X, residuals)</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__meta"># Correcting leaf vlaues</span></span></code><code><span class="code-snippet_outer"> h = probs * (<span class="code-snippet__number">1</span> - probs)</span></code><code><span class="code-snippet_outer"> leaf_nodes = np.nonzero(tree_reg.tree_ .children_left == <span class="code-snippet__number">-1</span>)[<span class="code-snippet__number">0</span>]</span></code><code><span class="code-snippet_outer"> leaf_node_for_each_sample = tree_reg.apply(X)</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">for</span> leaf <span class="code-snippet__keyword">in</span> leaf_nodes:</span></code><code><span class="code-snippet_outer"> leaf_samples = np.where(leaf_node_for_each_sample == leaf)[<span class="code-snippet__number">0</span>]</span></code><code><span class="code-snippet_outer"> residuals_in_leaf = residuals.take(leaf_samples, axis=<span class="code-snippet__number">0</span>)</span></code><code><span class="code-snippet_outer"> h_in_leaf = h.take(leaf_samples, axis=<span class="code-snippet__number">0</span>)</span></code><code><span class="code-snippet_outer"> value = np.sum(residuals_in_leaf) / np.sum(h_in_leaf)</span></code><code><span class="code-snippet_outer"> tree_reg.tree_.value[leaf, <span class="code-snippet__number">0</span>, <span class="code-snippet__number">0</span>] = value</span></code><code><span class="code-snippet_outer"> </span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">self</span>.tree_list.append(tree_reg)</span></code><code><span class="code-snippet_outer"> reg_pred = tree_reg.predict(X)</span></code><code><span class="code-snippet_outer"> Fm += <span class="code-snippet__keyword">self</span>.learning_rate * reg_pred</span></code><code><span class="code-snippet_outer"> probs = np.exp(Fm) / (<span class="code-snippet__number">1</span>+ np.exp(Fm))</span></code><code><span class="code-snippet_outer"> </span></code><code><span class="code-snippet_outer"> def predict_proba(<span class="code-snippet__keyword">self</span>, X):</span></code><code><span class="code-snippet_outer"> FM = <span class="code-snippet__keyword">self</span>.F0 + <span class="code-snippet__keyword">self</span>.learning_rate * \</span></code><code><span class="code-snippet_outer"> np.sum([t.predict(X) <span class="code-snippet__keyword">for</span> t <span class="code-snippet__keyword">in</span> <span class="code-snippet__keyword">self</span>.tree_list], axis=<span class="code-snippet__number">0</span>)</span></code><code><span class="code-snippet_outer"> prob = np.exp(FM) / (<span class="code-snippet__number">1</span>+ np.exp(FM))</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">return</span> prob</span></code><code><span class="code-snippet_outer"> </span></code><code><span class="code-snippet_outer"> def predict(<span class="code-snippet__keyword">self</span>, X):</span></code><code><span class="code-snippet_outer"> yhat = (<span class="code-snippet__keyword">self</span>.predict_proba(X) >= <span class="code-snippet__number">0.5</span>).astype(<span class="code-snippet__keyword">int</span>)</span></code><code><span class="code-snippet_outer"> <span class="code-snippet__keyword">return</span> yhat</span></code></pre></section><pre spellcheck="false" cid="n302" mdtype="fences" data-style="overflow: visible; font-size: 0.85rem; break-inside: avoid; text-align: left; white-space: normal; background-image: inherit; background-position: inherit; background-size: inherit; background-repeat: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; width: inherit; border-width: 1px; border-style: solid; border-color: rgb(244, 244, 244); -webkit-font-smoothing: initial; line-height: 1.43rem; border-radius: 2px; overflow-wrap: normal; color: rgb(82, 97, 106); letter-spacing: normal; font-family: "PingFang SC", "Lantinghei SC", "Microsoft Yahei", "Hiragino Sans GB", "Microsoft Sans Serif", "WenQuanYi Micro Hei", sans !important; background-color: rgb(240, 245, 249); padding: 0.3rem 0rem !important; margin-top: 0.8rem !important; margin-bottom: 0.8rem !important;" class="js_darkmode__37"><span style="display: none;line-height: 0px;"></span><span style="font-size: 15px;"><br /></span></pre><section style="line-height: 1.5em;"><span style="font-size: 15px;">类中的函数 predict_proba() 返回 ( p(x) ),而函数 predict() 返回预测的二元目标。该类接收一个参数 num_estimators,其中包括 ( F_0 )。如果我们有 M 棵树,那么 num_estimators 应为 ( M+1 )。现在我们将梯度提升分类器拟合到这个数据集上。</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><pre spellcheck="false" cid="n341" mdtype="fences" data-style="overflow: visible; font-size: 0.85rem; break-inside: avoid; text-align: left; white-space: normal; background-image: inherit; background-position: inherit; background-size: inherit; background-repeat: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; width: inherit; border-width: 1px; border-style: solid; border-color: rgb(244, 244, 244); -webkit-font-smoothing: initial; line-height: 1.43rem; border-radius: 2px; overflow-wrap: normal; color: rgb(82, 97, 106); letter-spacing: normal; font-family: "PingFang SC", "Lantinghei SC", "Microsoft Yahei", "Hiragino Sans GB", "Microsoft Sans Serif", "WenQuanYi Micro Hei", sans !important; background-color: rgb(240, 245, 249); padding: 0.3rem 0rem !important; margin-top: 0.8rem !important; margin-bottom: 0.8rem !important;" class="js_darkmode__42"><section style="line-height: 1.5em;"><span style="font-size: 15px;"></span></section></pre><section class="code-snippet__fix code-snippet__js"><ul class="code-snippet__line-index code-snippet__js"><li></li><li></li><li></li></ul><pre class="code-snippet__js" data-lang="nginx"><code><span class="code-snippet_outer"> <span class="code-snippet__attribute">gbm_clf</span> = GradBoostingClassifier(num_estimators=<span class="code-snippet__number">30</span>,</span></code><code><span class="code-snippet_outer"> learning_rate=<span class="code-snippet__number">0</span>.<span class="code-snippet__number">1</span>, max_depth=<span class="code-snippet__number">1</span>)</span></code><code><span class="code-snippet_outer"> gbm_clf.fit(X, y)</span></code></pre></section><pre spellcheck="false" cid="n341" mdtype="fences" data-style="overflow: visible; font-size: 0.85rem; break-inside: avoid; text-align: left; white-space: normal; background-image: inherit; background-position: inherit; background-size: inherit; background-repeat: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; width: inherit; border-width: 1px; border-style: solid; border-color: rgb(244, 244, 244); -webkit-font-smoothing: initial; line-height: 1.43rem; border-radius: 2px; overflow-wrap: normal; color: rgb(82, 97, 106); letter-spacing: normal; font-family: "PingFang SC", "Lantinghei SC", "Microsoft Yahei", "Hiragino Sans GB", "Microsoft Sans Serif", "WenQuanYi Micro Hei", sans !important; background-color: rgb(240, 245, 249); padding: 0.3rem 0rem !important; margin-top: 0.8rem !important; margin-bottom: 0.8rem !important;" class="js_darkmode__42"><span style="display: none;line-height: 0px;"></span><span style="font-size: 15px;"><br /></span></pre><section style="line-height: 1.5em;"><span style="font-size: 15px;">结果如下:</span></section><section style="line-height: 1.5em;"><span style="font-size: 15px;"><br /></span></section><pre spellcheck="false" cid="n346" mdtype="fences" data-style="overflow: visible; font-size: 0.85rem; break-inside: avoid; text-align: left; white-space: normal; background-image: inherit; background-position: inherit; background-size: inherit; background-repeat: inherit; background-attachment: inherit; background-origin: inherit; background-clip: inherit; width: inherit; border-width: 1px; border-style: solid; border-color: rgb(244, 244, 244); -webkit-font-smoothing: initial; line-height: 1.43rem; border-radius: 2px; overflow-wrap: normal; color: rgb(82, 97, 106); letter-spacing: normal; font-family: "PingFang SC", "Lantinghei SC", "Microsoft Yahei", "Hiragino Sans GB", "Microsoft Sans Serif", "WenQuanYi Micro Hei", sans !important; background-color: rgb(240, 245, 249); padding: 0.3rem 0rem !important; margin-top: 0.8rem !important; margin-bottom: 0.8rem !important;" class="js_darkmode__43"><section style="line-height: 1.5em;"><span style="font-size: 15px;"></span></section></pre><section class="code-snippet__fix code-snippet__js"><ul class="code-snippet__line-index code-snippet__js"><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li><li></li></ul><pre class="code-snippet__js" data-lang="python"><code><span class="code-snippet_outer"> plt.figure(figsize=(<span class="code-snippet__number">8</span>, <span class="code-snippet__number">8</span>))</span></code><code><span class="code-snippet_outer"> plot_boundary(X, y, gbm_clf, lims=[<span class="code-snippet__number">-1</span>, <span class="code-snippet__number">5</span>, <span class="code-snippet__number">-1</span>, <span class="code-snippet__number">5</span>])</span></code><code><span class="code-snippet_outer"> plt.axvline(x=<span class="code-snippet__number">1.8</span>, color=<span class="code-snippet__string">"black"</span>, linestyle=<span class="code-snippet__string">"--"</span>, label=<span class="code-snippet__string">"Actual boundary"</span>)</span></code><code><span class="code-snippet_outer"> plt.text(<span class="code-snippet__number">0</span>, <span class="code-snippet__number">-0.3</span>, <span class="code-snippet__string">r"\)\hat{y}=0\("</span>, fontsize=<span class="code-snippet__number">15</span>)</span></code><code><span class="code-snippet_outer"> plt.text(<span class="code-snippet__number">3</span>, <span class="code-snippet__number">-0.3</span>, <span class="code-snippet__string">r"\)\hat{y}=1\("</span>, fontsize=<span class="code-snippet__number">15</span>)</span></code><code><span class="code-snippet_outer"> ax = plt.gca() </span></code><code><span class="code-snippet_outer"> ax.set_aspect(<span class="code-snippet__string">'equal'</span>)</span></code><code><span class="code-snippet_outer"> plt.xlim([<span class="code-snippet__number">-0.5</span>, <span class="code-snippet__number">4.5</span>])</span></code><code><span class="code-snippet_outer"> plt.ylim([<span class="code-snippet__number">-0.5</span>, <span class="code-snippet__number">4.6</span>])</span></code><code><span class="code-snippet_outer"> plt.xlabel(<span class="code-snippet__string">'\)x_1\('</span>, fontsize=<span class="code-snippet__number">18</span>)</span></code><code><span class="code-snippet_outer"> plt.ylabel(<span class="code-snippet__string">'\)x_2$’, fontsize=18)plt.legend(loc=“best”, fontsize=14)plt.show()![]()
即使在集成中使用了29棵树,模型仍然没有过拟合,并且正确地预测了边界。我们也可以使用scikit-learn库中的 GradientBoostingClassifier 类来进行梯度提升分类,并用它来测试我们的实现:
讯享网
from sklearn.ensemble import GradientBoostingClassifiergbm_clf_sklrean = GradientBoostingClassifier(n_estimators=30,learning_rate=0.1,max_depth=1)y)phat_sklrean = gbm_clf_sklrean.predict_proba(X)[:,1]phat = gbm_clf.predict_proba(X)phat_sklrean)在本文中,我们尝试对决策树的进行可视化解释。决策树是一种由若干节点组成的非参数模型。每个节点本质上是一个线性分类器,但当它们结合在一起时,可以学习数据集中的任何非线性模式。但这种灵活性以过拟合为代价,这意味着当树长得太大时,它开始学习数据点中的噪声。
梯度提升是一种集成方法,它由一系列弱决策树组成,每棵树都试图改进前一棵树的预测。梯度提升保留了决策树的灵活性,但对过拟合更具鲁棒性。
编辑:王菁
关于我们数据派THU作为数据科学类公众号,背靠清华大学大数据研究中心,分享前沿数据科学与大数据技术创新研究动态、持续传播数据科学知识,努力建设数据人才聚集平台、打造中国大数据最强集团军。http://mmbiz.qpic.cn/mmbiz_png/heS6wRSHVMkUuToOTWS65qdgf9RhVicljUfXcOu2ick7IibmKfsdhahcjYWHicxfwUvumZEicp9EBOplvbmhiaSwIamg/300?wx_fmt=png&wxfrom=19” data-nickname=“数据派THU” data-alias=“DatapiTHU” data-signature=“清华大数据研究中心官方平台,发布团队科研、教学等最新动态及大数据领域的相关信息 ” data-from=“2” data-is_biz_ban=“0” data-origin_num=“862” data-isban=“0” data-biz_account_status=“0” data-index=“0”>新浪微博:@数据派THU微信视频号:数据派THU今日头条:数据派THU
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/172020.html