代码:
 
import numpy as np import scipy.io as sio import matplotlib.pyplot as plt
mat=sio.loadmat('ex8data1.mat') print(mat.keys())# X Xval yval
X=mat['X']#(307,2) Xval,yval=mat['Xval'],mat['yval']#(307,2)(307,1) # 绘制初始图像
plt.plot(X[:,0],X[:,1],'bx') plt.show() # 1.获取训练集中样本特征的均值和方差 def
estimateGaussianl(X,isCovariance): means=np.mean(X,axis=0) if isCovariance:
sigma2=(X-means).T@(X-means)/len(X) else: sigma2=np.var(X,axis=0) return
means,sigma2 # 2.多元正态分布密度函数 def gaussian(X,means,sigma2): if
np.ndim(sigma2)==1: # 转为二维矩阵 sigma2=np.diag(sigma2) X=X-means n=X.shape[1]
first=np.power(2*np.pi,-n/2)*(np.linalg.det(sigma2)**(-0.5))#是一个数
second=np.diag(X@np.linalg.inv(sigma2)@X.T)#(307,)
p=first*np.exp(-0.5*second)#(307,) p=p.reshape(-1,1)#转化成一列 return p # 3.绘图 def
plotGaussian(X,means,sigma2): x=np.arange(0,30,0.5) y=np.arange(0,30,0.5)
xx,yy=np.meshgrid(x,y) # 计算对应的高斯分布函数
z=gaussian(np.c_[xx.ravel(),yy.ravel()],means,sigma2) zz=z.reshape(xx.shape)
plt.plot(X[:,0],X[:,1],'bx') contour_levels=[10**h for h in range(-20,0,3)]
plt.contour(xx,yy,zz,contour_levels)
means,sigma2=estimateGaussianl(X,isCovariance=False)
plotGaussian(X,means,sigma2) # 4.选取阈值 def selectThreshold(yval,p):
bestEpsilon=0 bestF1=0 # 候选值 epsilons=np.linspace(min(p),max(p),1000) for e in
epsilons: p_=p<e tp=np.sum((yval==1)&(p_==1)) fp=np.sum((yval==0)&(p_==1))
fn=np.sum((yval==1)&(p_==0)) prec=tp/(tp+fp) if(tp+fp) else 0 rec=tp/(tp+fn)
if(tp+fn) else 0 F1_e=2*prec*rec/(prec+rec) if (prec+rec) else 0 if
F1_e>bestF1: bestF1=F1_e bestEpsilon=e return bestEpsilon,bestF1
means,sigma2=estimateGaussianl(X,isCovariance=False) print(means,sigma2)
pval=gaussian(Xval,means,sigma2) bestEpsilon,bestF1=selectThreshold(yval,pval)
# 找出异常点 p=gaussian(X,means,sigma2) anoms=np.array([X[i] for i in
range(X.shape[0]) if p[i]<bestEpsilon]) plotGaussian(X,means,sigma2)
plt.scatter(anoms[:,0],anoms[:,1],c='r',marker='o')
 结果展示:

 

技术
今日推荐
PPT
阅读数 99
下载桌面版
GitHub
百度网盘(提取码:draw)
Gitee
云服务器优惠
阿里云优惠券
腾讯云优惠券
华为云优惠券
站点信息
问题反馈
邮箱:ixiaoyang8@qq.com
QQ群:766591547
关注微信