Numpy实现NaiveBayes(朴素贝叶斯)
【摘要】
from __future__ import division, print_function
import numpy as np
import math
from mlfromscratch.util...
from __future__ import division, print_function
import numpy as np
import math
from mlfromscratch.utils import train_test_split, normalize
from mlfromscratch.utils import Plot, accuracy_score
class NaiveBayes():
"""The Gaussian Naive Bayes classifier. """
def fit(self, X, y):
self.X, self.y = X, y
self.classes = np.unique(y)
self.parameters = []
# Calculate the mean and variance of each feature for each class
for i, c in enumerate(self.classes):
# Only select the rows where the label equals the given class
X_where_c = X[np.where(y == c)]
self.parameters.append([])
# Add the mean and variance for each feature (column)
for col in X_where_c.T:
parameters = {"mean": col.mean(), "var": col.var()}
self.parameters[i].append(parameters)
def _calculate_likelihood(self, mean, var, x):
""" Gaussian likelihood of the data x given mean and var """
eps = 1e-4 # Added in denominator to prevent division by zero
coeff = 1.0 / math.sqrt(2.0 * math.pi * var + eps)
exponent = math.exp(-(math.pow(x - mean, 2) / (2 * var + eps)))
return coeff * exponent
def _calculate_prior(self, c):
""" Calculate the prior of class c
(samples where class == c / total number of samples)"""
frequency = np.mean(self.y == c)
return frequency
def _classify(self, sample):
""" Classification using Bayes Rule P(Y|X) = P(X|Y)*P(Y)/P(X),
or Posterior = Likelihood * Prior / Scaling Factor
P(Y|X) - The posterior is the probability that sample x is of class y given the
feature values of x being distributed according to distribution of y and the prior.
P(X|Y) - Likelihood of data X given class distribution Y.
Gaussian distribution (given by _calculate_likelihood)
P(Y) - Prior (given by _calculate_prior)
P(X) - Scales the posterior to make it a proper probability distribution.
This term is ignored in this implementation since it doesn't affect
which class distribution the sample is most likely to belong to.
Classifies the sample as the class that results in the largest P(Y|X) (posterior)
"""
posteriors = []
# Go through list of classes
for i, c in enumerate(self.classes):
# Initialize posterior as prior
posterior = self._calculate_prior(c)
# Naive assumption (independence):
# P(x1,x2,x3|Y) = P(x1|Y)*P(x2|Y)*P(x3|Y)
# Posterior is product of prior and likelihoods (ignoring scaling factor)
for feature_value, params in zip(sample, self.parameters[i]):
# Likelihood of feature value given distribution of feature values given y
likelihood = self._calculate_likelihood(params["mean"], params["var"], feature_value)
posterior *= likelihood
posteriors.append(posterior)
# Return the class with the largest posterior probability
return self.classes[np.argmax(posteriors)]
def predict(self, X):
""" Predict the class labels of the samples in X """
y_pred = [self._classify(sample) for sample in X]
return y_pred
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
文章来源: wanghao.blog.csdn.net,作者:AI浩,版权归原作者所有,如需转载,请联系作者。
原文链接:wanghao.blog.csdn.net/article/details/121558563
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)