第一个例子是scikit-learn官网,数据是假设的
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X, Y)
print(clf.predict([[-0.8, -1]]))
第二个例子也是来自于scikit-learn官网,数据是鸢尾花的分类,数据总共150个,每种花有50个数据,总共有3种类型。程序用全部数据用作训练,再用全部数据用于测试,然后比较结果。运行后准确率达到96%.
from sklearn import datasets
iris = datasets.load_iris()
print(iris.data)
print(iris.target)
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)
print("Number of mislabeled points out of a total %d points : %d" % (iris.data.shape[0],(iris.target != y_pred).sum()))
第三个例子是经过我修改第二个例子而成,我用60%作为training, 40%用作testing,最后得到test data的准确率达到96.7%.
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
train = np.vstack((iris.data[:30], iris.data[50:80], iris.data[100:130]))
test = np.vstack((iris.data[30:50], iris.data[80:100], iris.data[130:]))
trainTarget = np.hstack((iris.target[:30], iris.target[50:80], iris.target[100:130]))
testTarget = np.hstack((iris.target[30:50], iris.target[80:100], iris.target[130:]))
print(train.shape[0])
print(test.shape[0])
print(trainTarget.shape[0])
print(testTarget.shape[0])
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
y_pred = gnb.fit(train, trainTarget).predict(test)
print("Number of mislabeled points out of a total %d points : %d" % (test.shape[0],(testTarget != y_pred).sum()))
第四个例子改用train_test_split函数分割training and testing data
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)
score = gnb.score(X_test, y_test)
print("Number of mislabeled points out of a total %d points : %d, score: %f" % (X_test.shape[0],(y_test != y_pred).sum(),score))
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)
gnb = GaussianNB()
y_pred = gnb.fit(X_train, y_train).predict(X_test)
score = gnb.score(X_test, y_test)
print("Number of mislabeled points out of a total %d points : %d, score: %f" % (X_test.shape[0],(y_test != y_pred).sum(),score))
No comments:
Post a Comment