Numpy实现XGBoost
【摘要】
from __future__ import division, print_function
import numpy as np
import progressbar
from mlfromscra...
from __future__ import division, print_function
import numpy as np
import progressbar
from mlfromscratch.utils import train_test_split, standardize, to_categorical, normalize
from mlfromscratch.utils import mean_squared_error, accuracy_score
from mlfromscratch.supervised_learning import XGBoostRegressionTree
from mlfromscratch.deep_learning.activation_functions import Sigmoid
from mlfromscratch.utils.misc import bar_widgets
from mlfromscratch.utils import Plot
class LogisticLoss():
def __init__(self):
sigmoid = Sigmoid()
self.log_func = sigmoid
self.log_grad = sigmoid.gradient
def loss(self, y, y_pred):
y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
p = self.log_func(y_pred)
return y * np.log(p) + (1 - y) * np.log(1 - p)
# gradient w.r.t y_pred
def gradient(self, y, y_pred):
p = self.log_func(y_pred)
return -(y - p)
# w.r.t y_pred
def hess(self, y, y_pred):
p = self.log_func(y_pred)
return p * (1 - p)
class XGBoost(object):
"""The XGBoost classifier.
Reference: http://xgboost.readthedocs.io/en/latest/model.html
Parameters:
-----------
n_estimators: int
The number of classification trees that are used.
learning_rate: float
The step length that will be taken when following the negative gradient during
training.
min_samples_split: int
The minimum number of samples needed to make a split when building a tree.
min_impurity: float
The minimum impurity required to split the tree further.
max_depth: int
The maximum depth of a tree.
"""
def __init__(self, n_estimators=200, learning_rate=0.001, min_samples_split=2,
min_impurity=1e-7, max_depth=2):
self.n_estimators = n_estimators # Number of trees
self.learning_rate = learning_rate # Step size for weight update
self.min_samples_split = min_samples_split # The minimum n of sampels to justify split
self.min_impurity = min_impurity # Minimum variance reduction to continue
self.max_depth = max_depth # Maximum depth for tree
self.bar = progressbar.ProgressBar(widgets=bar_widgets)
# Log loss for classification
self.loss = LogisticLoss()
# Initialize regression trees
self.trees = []
for _ in range(n_estimators):
tree = XGBoostRegressionTree(
min_samples_split=self.min_samples_split,
min_impurity=min_impurity,
max_depth=self.max_depth,
loss=self.loss)
self.trees.append(tree)
def fit(self, X, y):
y = to_categorical(y)
y_pred = np.zeros(np.shape(y))
for i in self.bar(range(self.n_estimators)):
tree = self.trees[i]
y_and_pred = np.concatenate((y, y_pred), axis=1)
tree.fit(X, y_and_pred)
update_pred = tree.predict(X)
y_pred -= np.multiply(self.learning_rate, update_pred)
def predict(self, X):
y_pred = None
# Make predictions
for tree in self.trees:
# Estimate gradient and update prediction
update_pred = tree.predict(X)
if y_pred is None:
y_pred = np.zeros_like(update_pred)
y_pred -= np.multiply(self.learning_rate, update_pred)
# Turn into probability distribution (Softmax)
y_pred = np.exp(y_pred) / np.sum(np.exp(y_pred), axis=1, keepdims=True)
# Set label to the value that maximizes probability
y_pred = np.argmax(y_pred, axis=1)
return y_pred
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
- 100
- 101
- 102
- 103
- 104
- 105
文章来源: wanghao.blog.csdn.net,作者:AI浩,版权归原作者所有,如需转载,请联系作者。
原文链接:wanghao.blog.csdn.net/article/details/121558375
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)