add code
This commit is contained in:
parent
645c43c9cb
commit
3ea2506fb6
|
@ -0,0 +1,183 @@
|
|||
# Straight-forward implementation of IVAP algorithm described in:
|
||||
# Large-scale probabilistic prediction with and without validity guarantees, Vovk et al.
|
||||
# https://arxiv.org/pdf/1511.00213.pdf
|
||||
#
|
||||
# Paolo Toccaceli
|
||||
#
|
||||
# https://github.com/ptocca/VennABERS
|
||||
#
|
||||
# 2020-07-09: Fixed bug in p0 calculation
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Some elementary functions to speak the same language as the paper
|
||||
# (at some point we'll just replace the occurrence of the calls with the function body itself)
|
||||
def push(x,stack):
|
||||
stack.append(x)
|
||||
|
||||
|
||||
def pop(stack):
|
||||
return stack.pop()
|
||||
|
||||
|
||||
def top(stack):
|
||||
return stack[-1]
|
||||
|
||||
|
||||
def nextToTop(stack):
|
||||
return stack[-2]
|
||||
|
||||
|
||||
# perhaps inefficient but clear implementation
|
||||
def nonleftTurn(a,b,c):
|
||||
d1 = b-a
|
||||
d2 = c-b
|
||||
return np.cross(d1,d2)<=0
|
||||
|
||||
|
||||
def nonrightTurn(a,b,c):
|
||||
d1 = b-a
|
||||
d2 = c-b
|
||||
return np.cross(d1,d2)>=0
|
||||
|
||||
|
||||
def slope(a,b):
|
||||
ax,ay = a
|
||||
bx,by = b
|
||||
return (by-ay)/(bx-ax)
|
||||
|
||||
|
||||
def notBelow(t,p1,p2):
|
||||
p1x,p1y = p1
|
||||
p2x,p2y = p2
|
||||
tx,ty = t
|
||||
m = (p2y-p1y)/(p2x-p1x)
|
||||
b = (p2x*p1y - p1x*p2y)/(p2x-p1x)
|
||||
return (ty >= tx*m+b)
|
||||
|
||||
kPrime = None
|
||||
|
||||
# Because we cannot have negative indices in Python (they have another meaning), I use a dictionary
|
||||
|
||||
def algorithm1(P):
|
||||
global kPrime
|
||||
|
||||
S = []
|
||||
P[-1] = np.array((-1,-1))
|
||||
push(P[-1],S)
|
||||
push(P[0],S)
|
||||
for i in range(1,kPrime+1):
|
||||
while len(S)>1 and nonleftTurn(nextToTop(S),top(S),P[i]):
|
||||
pop(S)
|
||||
push(P[i],S)
|
||||
return S
|
||||
|
||||
|
||||
def algorithm2(P,S):
|
||||
global kPrime
|
||||
|
||||
Sprime = S[::-1] # reverse the stack
|
||||
|
||||
F1 = np.zeros((kPrime+1,))
|
||||
for i in range(1,kPrime+1):
|
||||
F1[i] = slope(top(Sprime),nextToTop(Sprime))
|
||||
P[i-1] = P[i-2]+P[i]-P[i-1]
|
||||
if notBelow(P[i-1],top(Sprime),nextToTop(Sprime)):
|
||||
continue
|
||||
pop(Sprime)
|
||||
while len(Sprime)>1 and nonleftTurn(P[i-1],top(Sprime),nextToTop(Sprime)):
|
||||
pop(Sprime)
|
||||
push(P[i-1],Sprime)
|
||||
return F1
|
||||
|
||||
|
||||
def algorithm3(P):
|
||||
global kPrime
|
||||
|
||||
S = []
|
||||
push(P[kPrime+1],S)
|
||||
push(P[kPrime],S)
|
||||
for i in range(kPrime-1,0-1,-1): # k'-1,k'-2,...,0
|
||||
while len(S)>1 and nonrightTurn(nextToTop(S),top(S),P[i]):
|
||||
pop(S)
|
||||
push(P[i],S)
|
||||
return S
|
||||
|
||||
|
||||
def algorithm4(P,S):
|
||||
global kPrime
|
||||
|
||||
Sprime = S[::-1] # reverse the stack
|
||||
|
||||
F0 = np.zeros((kPrime+1,))
|
||||
for i in range(kPrime,1-1,-1): # k',k'-1,...,1
|
||||
F0[i] = slope(top(Sprime),nextToTop(Sprime))
|
||||
P[i] = P[i-1]+P[i+1]-P[i]
|
||||
if notBelow(P[i],top(Sprime),nextToTop(Sprime)):
|
||||
continue
|
||||
pop(Sprime)
|
||||
while len(Sprime)>1 and nonrightTurn(P[i],top(Sprime),nextToTop(Sprime)):
|
||||
pop(Sprime)
|
||||
push(P[i],Sprime)
|
||||
return F0
|
||||
|
||||
|
||||
def prepareData(calibrPoints):
|
||||
global kPrime
|
||||
|
||||
ptsSorted = sorted(calibrPoints)
|
||||
|
||||
xs = np.fromiter((p[0] for p in ptsSorted),float)
|
||||
ys = np.fromiter((p[1] for p in ptsSorted),float)
|
||||
ptsUnique,ptsIndex,ptsInverse,ptsCounts = np.unique(xs,
|
||||
return_index=True,
|
||||
return_counts=True,
|
||||
return_inverse=True)
|
||||
a = np.zeros(ptsUnique.shape)
|
||||
np.add.at(a,ptsInverse,ys)
|
||||
# now a contains the sums of ys for each unique value of the objects
|
||||
|
||||
w = ptsCounts
|
||||
yPrime = a/w
|
||||
yCsd = np.cumsum(w*yPrime) # Might as well do just np.cumsum(a)
|
||||
xPrime = np.cumsum(w)
|
||||
kPrime = len(xPrime)
|
||||
|
||||
return yPrime,yCsd,xPrime,ptsUnique
|
||||
|
||||
|
||||
def computeF(xPrime,yCsd):
|
||||
global kPrime
|
||||
P = {0:np.array((0,0))}
|
||||
P.update({i+1:np.array((k,v)) for i,(k,v) in enumerate(zip(xPrime,yCsd))})
|
||||
|
||||
S = algorithm1(P)
|
||||
F1 = algorithm2(P,S)
|
||||
|
||||
P = {0:np.array((0,0))}
|
||||
P.update({i+1:np.array((k,v)) for i,(k,v) in enumerate(zip(xPrime,yCsd))})
|
||||
P[kPrime+1] = P[kPrime] + np.array((1.0,0.0)) # The paper says (1,1)
|
||||
|
||||
S = algorithm3(P)
|
||||
F0 = algorithm4(P,S)
|
||||
|
||||
return F0,F1
|
||||
|
||||
|
||||
def getFVal(F0,F1,ptsUnique,testObjects):
|
||||
pos0 = np.searchsorted(ptsUnique,testObjects,side='left')
|
||||
pos1 = np.searchsorted(ptsUnique[:-1],testObjects,side='right')+1
|
||||
return F0[pos0],F1[pos1]
|
||||
|
||||
|
||||
def ScoresToMultiProbs(calibrPoints,testObjects):
|
||||
# sort the points, transform into unique objects, with weights and updated values
|
||||
yPrime,yCsd,xPrime,ptsUnique = prepareData(calibrPoints)
|
||||
|
||||
# compute the F0 and F1 functions from the CSD
|
||||
F0,F1 = computeF(xPrime,yCsd)
|
||||
|
||||
# compute the values for the given test objects
|
||||
p0,p1 = getFVal(F0,F1,ptsUnique,testObjects)
|
||||
|
||||
return p0,p1
|
|
@ -0,0 +1,107 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import sklearn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
from sklearn import metrics
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
|
||||
#找例子
|
||||
'''
|
||||
data=pd.read_csv("./predict1.csv")
|
||||
#data.columns=["label","PCA","MCD","IForest","LODA","LOF","KNN","CBLOF","HBOS","VAE","OCSVM"]
|
||||
print(data.head())
|
||||
for indexs in data.index:
|
||||
data1=data.loc[indexs].values[0:-1]
|
||||
data1=data.loc[indexs].tolist()
|
||||
count0=data1[0]
|
||||
count1=data1[1:].count(1)
|
||||
count2=data1[1:].count(2)
|
||||
count3=data1[1:].count(0)
|
||||
if count1==3 and count0==1 and count3==1:
|
||||
#if count3==count1 and count2>0:
|
||||
print(indexs)
|
||||
|
||||
'''
|
||||
def plot(f,t):
|
||||
plt.plot(f, t, "r", marker='*', ms=1, label="a")
|
||||
plt.xlabel("p1-p0")
|
||||
plt.ylabel("f1")
|
||||
plt.show()
|
||||
|
||||
global t
|
||||
|
||||
def accuracy_score(y, y_hat):
|
||||
return sum(yi == yi_hat for yi, yi_hat in zip(y, y_hat)) / len(y)
|
||||
|
||||
def precision_score(y, y_hat):
|
||||
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
predicted_positive = sum(y_hat)
|
||||
return true_positive / predicted_positive
|
||||
|
||||
def get_tpr(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
actual_positive = sum(y)
|
||||
|
||||
return true_positive / actual_positive
|
||||
def count_p(p0,p1):
|
||||
'''
|
||||
global t
|
||||
global yz
|
||||
if p1-p0>=yz-0.000000001:
|
||||
return 2
|
||||
'''
|
||||
#if p1-p0>=t:
|
||||
#return 2
|
||||
|
||||
if p1/(1-p0+p1)>(0.5-t) and p1/(1-p0+p1)<(0.50+0.5*t):
|
||||
return 2
|
||||
|
||||
if p1/(1-p0+p1)>0.5: #可以用来调节roc
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
data=pd.read_csv("./p01.csv")
|
||||
xx=[]
|
||||
yy=[]
|
||||
|
||||
for i in range(1,50):
|
||||
t=0.005*i
|
||||
|
||||
data['venn_pre']=data.apply(lambda x:count_p(x["PCA_p0"],x["PCA_p1"]),axis=1)
|
||||
print(len(data))
|
||||
data1=data[data['venn_pre']!=2]
|
||||
print(len(data1))
|
||||
try:
|
||||
accuracy=accuracy_score(np.array(data1['label_number']),np.array(data1['venn_pre']))
|
||||
precision=precision_score(np.array(data1['label_number']),np.array(data1['venn_pre']))
|
||||
tpr=get_tpr(np.array(data1['label_number']),np.array(data1['venn_pre']))
|
||||
f1=2 * precision * tpr / (precision + tpr)
|
||||
|
||||
except ZeroDivisionError:
|
||||
continue
|
||||
else:
|
||||
xx.append(t)
|
||||
yy.append(f1)
|
||||
|
||||
'''
|
||||
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
'''
|
||||
print("F1值为{}".format(f1))
|
||||
|
||||
|
||||
plot(xx,yy)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
import os
|
||||
from sklearn.preprocessing import normalize
|
||||
|
||||
|
||||
|
||||
def accuracy_score(y, y_hat):
|
||||
return sum(yi == yi_hat for yi, yi_hat in zip(y, y_hat)) / len(y)
|
||||
|
||||
def precision_score(y, y_hat):
|
||||
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
predicted_positive = sum(y_hat)
|
||||
return true_positive / predicted_positive
|
||||
|
||||
def get_tpr(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
actual_positive = sum(y)
|
||||
|
||||
return true_positive / actual_positive
|
||||
|
||||
|
||||
def read_csv(filepath,testname,calname):
|
||||
name=calname.split('校')[0]
|
||||
test_data=pd.read_csv(filepath+testname)
|
||||
cal_data=pd.read_csv(filepath+calname)
|
||||
|
||||
cal_data['new_col']= list(zip(cal_data.score,cal_data.label_number))
|
||||
tac=cal_data.new_col.tolist()
|
||||
tec=test_data.score.tolist()
|
||||
|
||||
|
||||
|
||||
|
||||
p0,p1=VennABERS.ScoresToMultiProbs(tac,tec)
|
||||
test_data[name+'_p0']=p0.tolist()
|
||||
test_data[name+'_p1']=p1.tolist()
|
||||
#p_data=pd.merge(test_data[name+'_p0'],test_data[name+'_p1'],left_index=True,right_index=True)
|
||||
|
||||
p01="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/p01.csv"
|
||||
if os.path.exists(p01):
|
||||
p_data=pd.read_csv(p01)
|
||||
p_data[name+'_p0']=test_data[name+'_p0']
|
||||
p_data[name+'_p1']=test_data[name+'_p1']
|
||||
p_data.to_csv('p01.csv',index=None)
|
||||
else:
|
||||
p_data=pd.merge(test_data[name+'_p0'],test_data[name+'_p1'],left_index=True,right_index=True)
|
||||
p_data.to_csv('p01.csv',index=None)
|
||||
|
||||
|
||||
def count_p(p0,p1):
|
||||
if p1-p0>=0.02:
|
||||
return 2
|
||||
if p1/(1-p0+p1)>0.5: #可以用来调节roc
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def count_re(a,b,c,d,e,f,g,h):
|
||||
T=(a,b,c,d,e,f,g,h)
|
||||
r0=T.count(0)
|
||||
r1=T.count(1)
|
||||
if r1>r0:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def ivap(p01,model):
|
||||
data=pd.read_csv(p01)
|
||||
data[model]=data.apply(lambda x:count_p(x[model+"_p0"],x[model+"_p1"]),axis=1)
|
||||
pre="/home/shaoleshi/民航/数据/kddcup.data/多模型协同/predict.csv"
|
||||
if os.path.exists(pre):
|
||||
p_data=pd.read_csv(pre)
|
||||
p_data[model]=data[model]
|
||||
p_data.to_csv('predict.csv',index=None)
|
||||
else:
|
||||
p_data=pd.merge(data['label_number'],data[model],left_index=True,right_index=True)
|
||||
p_data.to_csv('predict.csv',index=None)
|
||||
|
||||
def result(predict):
|
||||
data=pd.read_csv(predict)
|
||||
data['venn_pre']=data.apply(lambda x:count_re(x["PCA"],x["MCD"],x["IForest"],x["LODA"],x["AutoEncoder"],x["LOF"],x["KNN"],x["OCSVM"]),axis=1)
|
||||
|
||||
accuracy=accuracy_score(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
precision=precision_score(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
tpr=get_tpr(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
|
||||
|
||||
|
||||
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
|
||||
if __name__=="__main__":
|
||||
Model=["PCA","MCD","IForest","LODA","LOF","KNN","OCSVM","CBLOF","HBOS","VAE"]
|
||||
p01="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/p01.csv"
|
||||
predict="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/predict.csv"
|
||||
filepath='/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/score/'
|
||||
|
||||
for i in Model:
|
||||
testname=i+'测试集分数.csv'
|
||||
calname=i+'校准集分数.csv'
|
||||
|
||||
read_csv(filepath,testname,calname)
|
||||
|
||||
'''
|
||||
for i in Model:
|
||||
ivap(p01,i)
|
||||
print(i)
|
||||
|
||||
#result(predict)
|
||||
'''
|
|
@ -0,0 +1,107 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
import os
|
||||
def accuracy_score(y, y_hat):
|
||||
return sum(yi == yi_hat for yi, yi_hat in zip(y, y_hat)) / len(y)
|
||||
|
||||
def precision_score(y, y_hat):
|
||||
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
predicted_positive = sum(y_hat)
|
||||
return true_positive / predicted_positive
|
||||
|
||||
def get_tpr(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
actual_positive = sum(y)
|
||||
return true_positive / actual_positive
|
||||
|
||||
def count_re(a,b,c,d,e,f,g,h,i,j,k):
|
||||
T=(a,b,c,d,e,f,g,h,i,j,k)
|
||||
r0=T.count(0)
|
||||
r1=T.count(1)
|
||||
if r1>r0:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def count_p(p0,p1):
|
||||
#if p1-p0>=0.02:
|
||||
#return 2
|
||||
if p1/(1-p0+p1)>0.18: #可以用来调节roc
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def read_csv(filepath,testname,calname):
|
||||
name=calname.split('校')[0]
|
||||
test_data=pd.read_csv(filepath+testname)
|
||||
cal_data=pd.read_csv(filepath+calname)
|
||||
cal_data['new_col']= list(zip(cal_data.score,cal_data.label_number))
|
||||
tac=cal_data.new_col.tolist()
|
||||
tec=test_data.score.tolist()
|
||||
p0,p1=VennABERS.ScoresToMultiProbs(tac,tec)
|
||||
test_data[name+'_p0']=p0.tolist()
|
||||
test_data[name+'_p1']=p1.tolist()
|
||||
#p_data=pd.merge(test_data[name+'_p0'],test_data[name+'_p1'],left_index=True,right_index=True)
|
||||
|
||||
p01="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/p01.csv"
|
||||
if os.path.exists(p01):
|
||||
p_data=pd.read_csv(p01)
|
||||
p_data[name+'_p0']=test_data[name+'_p0']
|
||||
p_data[name+'_p1']=test_data[name+'_p1']
|
||||
p_data.to_csv('p01.csv',index=None)
|
||||
else:
|
||||
p_data=pd.merge(test_data[name+'_p0'],test_data[name+'_p1'],left_index=True,right_index=True)
|
||||
p_data.to_csv('p01.csv',index=None)
|
||||
|
||||
def ivap(p01,model):
|
||||
data=pd.read_csv(p01)
|
||||
data[model]=data.apply(lambda x:count_p(x[model+"_p0"],x[model+"_p1"]),axis=1)
|
||||
pre="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/predict.csv"
|
||||
if os.path.exists(pre):
|
||||
p_data=pd.read_csv(pre)
|
||||
p_data[model]=data[model]
|
||||
p_data.to_csv('predict.csv',index=None)
|
||||
else:
|
||||
p_data=pd.merge(data['label_number'],data[model],left_index=True,right_index=True)
|
||||
p_data.to_csv('predict.csv',index=None)
|
||||
|
||||
def result(predict):
|
||||
data=pd.read_csv(predict)
|
||||
#data2=pd.read_csv("/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/p01.csv")
|
||||
data['venn_pre']=data.apply(lambda x:count_re(x["CBLOF"],x["HBOS"],x["VAE"],x["PCA"],x["MCD"],x["IForest"],x["LODA"],x["AutoEncoder"],x["LOF"],x["KNN"],x["OCSVM"]),axis=1)
|
||||
|
||||
accuracy=accuracy_score(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
precision=precision_score(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
tpr=get_tpr(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
f1=2 * precision * tpr / (precision + tpr)
|
||||
print(data.head())
|
||||
print(len(data['label_number']))
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
print("F1值为{}".format(f1))
|
||||
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
Model=["CBLOF","HBOS","PCA","MCD","IForest","LODA","LOF","KNN","OCSVM","VAE","AutoEncoder"]
|
||||
p01="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/p01.csv"
|
||||
predict="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/predict.csv"
|
||||
filepath='/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/score/'
|
||||
'''
|
||||
for i in Model:
|
||||
testname=i+'测试集分数.csv'
|
||||
calname=i+'校准集分数.csv'
|
||||
read_csv(filepath,testname,calname)
|
||||
'''
|
||||
for i in Model:
|
||||
ivap(p01,i)
|
||||
print(i)
|
||||
|
||||
result(predict)
|
||||
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import sklearn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
from sklearn import metrics
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
|
||||
def accuracy_score(y, y_hat):
|
||||
return sum(yi == yi_hat for yi, yi_hat in zip(y, y_hat)) / len(y)
|
||||
|
||||
def precision_score(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
predicted_positive = sum(y_hat)
|
||||
return true_positive / predicted_positive
|
||||
|
||||
def get_tpr(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
actual_positive = sum(y)
|
||||
return true_positive / actual_positive
|
||||
|
||||
def p_yuzhi(p0,p1):
|
||||
return p1-p0
|
||||
|
||||
def count_p(p0,p1):
|
||||
#global t
|
||||
global yz
|
||||
if p1-p0>=yz-0.000000001:
|
||||
return 2
|
||||
if p1/(1-p0+p1)<0.18 and p1/(1-p0+p1)>0.06:
|
||||
return 2
|
||||
if p1/(1-p0+p1)>0.18: #可以用来调节roc
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def count_re(a,b,c,d,e,f,g,h,i,j,k):
|
||||
T=(a,b,c,d,e,f,g,h,i,j,k)
|
||||
r0=T.count(0)
|
||||
r1=T.count(1)
|
||||
if r0==0 and r1==0:
|
||||
return 2
|
||||
if r1>r0:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
def ivap(p01,model):
|
||||
global yz
|
||||
|
||||
p0p1="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/阈值.csv"
|
||||
data2=pd.read_csv(p0p1)
|
||||
yz=data2[model+"_p"].tolist()[0]
|
||||
print(yz)
|
||||
|
||||
|
||||
data=pd.read_csv(p01)
|
||||
data[model]=data.apply(lambda x:count_p(x[model+"_p0"],x[model+"_p1"]),axis=1)
|
||||
pre="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/predict1.csv"
|
||||
if os.path.exists(pre):
|
||||
p_data=pd.read_csv(pre)
|
||||
p_data[model]=data[model]
|
||||
p_data.to_csv('predict1.csv',index=None)
|
||||
else:
|
||||
p_data=pd.merge(data['label_number'],data[model],left_index=True,right_index=True)
|
||||
p_data.to_csv('predict1.csv',index=None)
|
||||
|
||||
#处理p1-p0阈值的问题
|
||||
def p1p0(p01,model):
|
||||
i=0
|
||||
data=pd.read_csv(p01)
|
||||
data[model+'_p']=data.apply(lambda x:p_yuzhi(x[model+"_p0"],x[model+"_p1"]),axis=1)
|
||||
#pre="/home/shaoleshi/民航/数据/kddcup.data/多模型协同/阈值.csv"
|
||||
list1=data[model+'_p'].tolist()
|
||||
list1=sorted(list1)
|
||||
if model=="PCA":
|
||||
i=0.995
|
||||
if model=="MCD":
|
||||
i=0.995
|
||||
if model=="IForest":
|
||||
i=0.995
|
||||
if model=="LODA":
|
||||
i=0.995
|
||||
if model=="LOF":
|
||||
i=0.995
|
||||
if model=="KNN":
|
||||
i=0.995
|
||||
if model=="OCSVM":
|
||||
i=0.995
|
||||
if model=="CBLOF":
|
||||
i=0.995
|
||||
if model=="HBOS":
|
||||
i=0.995
|
||||
if model=="VAE":
|
||||
i=0.995
|
||||
if model=="AutoEncoder":
|
||||
i=0.995
|
||||
t=list1[int(len(list1)*i)]
|
||||
'''
|
||||
print(list1[int(len(list1)*0.1)])
|
||||
print(list1[int(len(list1)*0.25)])
|
||||
print(list1[int(len(list1)*0.5)])
|
||||
print(list1[int(len(list1)*0.75)])
|
||||
print(list1[int(len(list1)*0.99)])
|
||||
'''
|
||||
return t
|
||||
#多模型投票所得结果
|
||||
def result(predict):
|
||||
data=pd.read_csv(predict)
|
||||
data['venn_pre']=data.apply(lambda x:count_re(x["CBLOF"],x["HBOS"],x["VAE"],x["PCA"],x["MCD"],x["IForest"],x["LODA"],x["AutoEncoder"],x["LOF"],x["KNN"],x["OCSVM"]),axis=1)
|
||||
|
||||
print(len(data))
|
||||
data=data[data['venn_pre']!=2]
|
||||
print(len(data))
|
||||
accuracy=accuracy_score(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
precision=precision_score(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
tpr=get_tpr(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
f1=2 * precision * tpr / (precision + tpr)
|
||||
print("ivap的结果如下所示:")
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
print("F1值为{}".format(f1))
|
||||
#return list(fpr)[1],list(tpr1)[1]
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
global t
|
||||
Model=["CBLOF","HBOS","PCA","MCD","IForest","LODA","LOF","KNN","OCSVM","VAE","AutoEncoder"]
|
||||
p01="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/p01.csv"
|
||||
predict1="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/predict1.csv"
|
||||
'''
|
||||
dict_p={}
|
||||
for i in Model:
|
||||
t=p1p0(p01,i)
|
||||
dict_p[i+'_p']=t
|
||||
ppp=pd.DataFrame(dict_p,index=[0])
|
||||
print(dict_p)
|
||||
ppp.to_csv("阈值.csv",index=None)
|
||||
'''
|
||||
|
||||
#输出每个模型的预测结果
|
||||
for i in Model:
|
||||
ivap(p01,i)
|
||||
#result1(predict1,i)
|
||||
result(predict1)
|
||||
|
|
@ -0,0 +1,199 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
import sklearn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
from sklearn import metrics
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def accuracy_score(y, y_hat):
|
||||
return sum(yi == yi_hat for yi, yi_hat in zip(y, y_hat)) / len(y)
|
||||
|
||||
def precision_score(y, y_hat):
|
||||
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
predicted_positive = sum(y_hat)
|
||||
return true_positive / predicted_positive
|
||||
|
||||
def get_tpr(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
actual_positive = sum(y)
|
||||
|
||||
return true_positive / actual_positive
|
||||
|
||||
|
||||
|
||||
def plot():
|
||||
data=pd.read_csv("/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/多模型roc.csv")
|
||||
#data2=pd.read_csv("/home/shaoleshi/民航/数据/kddcup.data/多模型协同/ivap_roc.csv")
|
||||
#data2=data2.sort_values(['fpr'], ascending = True)
|
||||
#print(data2.head())
|
||||
|
||||
x=data["AutoEncoder_fpr"].tolist()
|
||||
y=data["AutoEncoder_tpr"].tolist()
|
||||
|
||||
x1=data["HBOS_fpr"].tolist()
|
||||
y1=data["HBOS_tpr"].tolist()
|
||||
|
||||
|
||||
x2=data["IForest_fpr"].tolist()
|
||||
y2=data["IForest_tpr"].tolist()
|
||||
|
||||
|
||||
x3=data["KNN_fpr"].tolist()
|
||||
y3=data["KNN_tpr"].tolist()
|
||||
|
||||
x4=data["LODA_fpr"].tolist()
|
||||
y4=data["LODA_tpr"].tolist()
|
||||
|
||||
x5=data["LOF_fpr"].tolist()
|
||||
y5=data["LOF_tpr"].tolist()
|
||||
|
||||
x6=data["MCD_fpr"].tolist()
|
||||
y6=data["MCD_tpr"].tolist()
|
||||
|
||||
x7=data["OCSVM_fpr"].tolist()
|
||||
y7=data["OCSVM_tpr"].tolist()
|
||||
|
||||
x8=data["PCA_fpr"].tolist()
|
||||
y8=data["PCA_tpr"].tolist()
|
||||
|
||||
x9=data["CBLOF_fpr"].tolist()
|
||||
y9=data["CBLOF_tpr"].tolist()
|
||||
|
||||
x10=data["VAE_fpr"].tolist()
|
||||
y10=data["VAE_tpr"].tolist()
|
||||
|
||||
|
||||
print(metrics.auc(x1, y1))
|
||||
print(metrics.auc(x2, y2))
|
||||
print(metrics.auc(x3, y3))
|
||||
print(metrics.auc(x4, y4))
|
||||
print(metrics.auc(x5, y5))
|
||||
print(metrics.auc(x6, y6))
|
||||
print(metrics.auc(x7, y7))
|
||||
print(metrics.auc(x8, y8))
|
||||
print(metrics.auc(x9, y9))
|
||||
print(metrics.auc(x10, y10))
|
||||
|
||||
l=plt.plot(x, y, "pink", marker='*', ms=1,label="AutoEncoder")
|
||||
|
||||
l1=plt.plot(x1, y1, "r", marker='*', ms=1,label="HBOS")
|
||||
l2=plt.plot(x2, y2, "y", marker='*', ms=1,label="IForest")
|
||||
l3=plt.plot(x3, y3, "c", marker='*', ms=1,label="KNN")
|
||||
l4=plt.plot(x4, y4, "m", marker='*', ms=1,label="LODA")
|
||||
l5=plt.plot(x5, y5, "g", marker='*', ms=1,label="LOF")
|
||||
l6=plt.plot(x6, y6, "b", marker='*', ms=1,label="MCD")
|
||||
l7=plt.plot(x7, y7, "k", marker='*', ms=1,label="OCSVM")
|
||||
l8=plt.plot(x8, y8, "greenyellow", marker='*', ms=1,label='PCA')
|
||||
l9=plt.plot(x9, y9, "sienna", marker='*', ms=1,label="CBLOF")
|
||||
l10=plt.plot(x10, y10, "orange", marker='*', ms=1,label="VAE")
|
||||
|
||||
plt.legend()
|
||||
plt.title('NSL_KDD ROC')
|
||||
plt.xlabel("fpr")
|
||||
plt.ylabel("tpr")
|
||||
plt.show()
|
||||
def iqr(result,i):
|
||||
|
||||
Percentile=np.percentile(result["score"],[0,25,50,75,100])
|
||||
aa=result.score.tolist()
|
||||
|
||||
IQR=0
|
||||
uplimit=0
|
||||
IQR=Percentile[3]-Percentile[1]
|
||||
uplimit=Percentile[3]+IQR*i
|
||||
|
||||
if np.isnan(uplimit):
|
||||
aa=result.score.tolist()
|
||||
|
||||
aa.sort(reverse = True)
|
||||
print(type(aa))
|
||||
IQR=aa[int(len(aa)/4)]-aa[int(len(aa)*3/4)]
|
||||
print(aa[int(len(aa)/4)])
|
||||
print(aa[int(len(aa)*3/4)])
|
||||
print(aa[1])
|
||||
uplimit=aa[int(len(aa)/4)]+IQR*i
|
||||
print(uplimit)
|
||||
print(len(result[result.score>uplimit]))
|
||||
|
||||
|
||||
|
||||
return uplimit
|
||||
|
||||
|
||||
def roc(test):
|
||||
f=[]
|
||||
t=[]
|
||||
dd = test.sort_values(by='score',ascending=False)
|
||||
print(dd.head())
|
||||
for i in range(1,112):
|
||||
|
||||
#limit=iqr(train,0.01*i)
|
||||
limit=list(dd.score)[i*100]
|
||||
|
||||
|
||||
test['label_test']=test.score.apply(lambda x: 1 if x>limit else 0)
|
||||
fpr,tpr1,thresholds=sklearn.metrics.roc_curve(test.label_number,
|
||||
test.label_test,
|
||||
pos_label=None,
|
||||
sample_weight=None,
|
||||
drop_intermediate=True)
|
||||
|
||||
|
||||
f.append(list(fpr)[1])
|
||||
t.append(list(tpr1)[1])
|
||||
return f,t
|
||||
|
||||
def result(data,filename):
|
||||
dd = data.sort_values(by='score',ascending=False)
|
||||
#for i in range(6,10):
|
||||
train=pd.read_csv("/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/AutoEncoder训练集.csv")
|
||||
limit=iqr(train,1.3)
|
||||
|
||||
data['label_test']=data.score.apply(lambda x: 1 if x>limit else 0)
|
||||
accuracy=accuracy_score(np.array(data.label_number),np.array(data.label_test))
|
||||
precision=precision_score(np.array(data.label_number),np.array(data.label_test))
|
||||
tpr=get_tpr(np.array(data.label_number),np.array(data.label_test))
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
def read_csv(filepath,filename):
|
||||
data=pd.read_csv(filepath+filename)
|
||||
result(data,filename)
|
||||
if __name__=='__main__':
|
||||
Model=["CBLOF","HBOS","PCA","MCD","IForest","LODA","LOF","KNN","OCSVM","VAE","AutoEncoder"]
|
||||
|
||||
|
||||
#多模型记录roc数据csv
|
||||
|
||||
dict={}
|
||||
for i in Model:
|
||||
data=pd.read_csv("/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/score/"+i+"测试集分数.csv")
|
||||
f,t=roc(data)
|
||||
dict[i+"_fpr"]=f
|
||||
dict[i+"_tpr"]=t
|
||||
data=pd.DataFrame(dict)
|
||||
data.to_csv("多模型roc.csv",index=None)
|
||||
|
||||
'''
|
||||
#print("已完成.format{}",i)
|
||||
|
||||
|
||||
#计算准确率召回率
|
||||
|
||||
filepath="/home/shaoleshi/民航/数据/kddcup.data/多模型协同/score/"
|
||||
filename="AutoEncoder测试集分数.csv"
|
||||
read_csv(filepath,filename)
|
||||
|
||||
|
||||
'''
|
||||
|
||||
plot()#总图
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,156 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import sklearn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.preprocessing import normalize
|
||||
'''
|
||||
author:leshi
|
||||
|
||||
'''
|
||||
from pyod.models.auto_encoder import AutoEncoder
|
||||
from pyod.models.lscp import LSCP
|
||||
from pyod.models.feature_bagging import FeatureBagging
|
||||
from pyod.models.pca import PCA
|
||||
from pyod.models.hbos import HBOS
|
||||
from pyod.models.iforest import IForest
|
||||
from pyod.models.cof import COF
|
||||
from pyod.models.mcd import MCD
|
||||
from pyod.models.lof import LOF
|
||||
from pyod.models.knn import KNN
|
||||
from pyod.models.cblof import CBLOF
|
||||
from pyod.models.loda import LODA
|
||||
from pyod.models.ocsvm import OCSVM
|
||||
from pyod.models.abod import ABOD
|
||||
from pyod.models.vae import VAE
|
||||
from pyod.models.so_gaal import SO_GAAL
|
||||
from pyod.models.mo_gaal import MO_GAAL
|
||||
from pyod.models.loci import LOCI
|
||||
|
||||
COF_clf = COF(contamination=0.01,n_neighbors=30)
|
||||
LSCP_clf = LSCP(contamination=0.01,detector_list = [LOF(), PCA()])
|
||||
LOCI_clf = LOCI(contamination=0.05)
|
||||
|
||||
|
||||
|
||||
|
||||
VAE_clf = VAE(contamination=0.001, epochs=50, gamma=0.8, capacity=0.2, encoder_neurons=[9, 4], decoder_neurons=[4, 9])
|
||||
ABOD_clf = ABOD(contamination=0.01,n_neighbors=20,method='default')
|
||||
FeatureBagging_clf = FeatureBagging(contamination=0.01,)
|
||||
AutoEncoder_clf = AutoEncoder(contamination=0.001)
|
||||
OCSVM_clf= OCSVM(contamination=0.001)
|
||||
LODA_clf = LODA(contamination=0.001)
|
||||
CBLOF_clf = CBLOF(contamination=0.001)
|
||||
LOF_clf = LOF(contamination=0.001)
|
||||
PCA_clf = PCA(contamination=0.001)
|
||||
HBOS_clf = HBOS(contamination=0.001)
|
||||
IForest_clf = IForest(contamination=0.001)
|
||||
MCD_clf = MCD(contamination=0.001)
|
||||
KNN_clf = KNN(contamination=0.001)
|
||||
SO_GAAL_clf = SO_GAAL(contamination=0.001)
|
||||
|
||||
MO_GAAL_clf = MO_GAAL(contamination=0.05, stop_epochs=2) #需要调参
|
||||
|
||||
Path="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/"
|
||||
Model_list=["CBLOF","HBOS","PCA","MCD","IForest","LODA","LOF","KNN","OCSVM","VAE","AutoEncoder"]
|
||||
#Model_list=["VAE"]
|
||||
def read_csv(filepath,filename):
|
||||
total_data=pd.read_csv(filepath+filename,header=None)
|
||||
total_data.columns=["duration","protocol_type","service","flag","src_bytes","dst_bytes","land",
|
||||
"wrong_fragment","urgent","hot","num_failed_logins","logged_in","num_compromised","root_shell",
|
||||
"su_attempted","num_root","num_file_creations","num_shells","num_access_files","num_outbound_cmds","is_host_login"
|
||||
,"is_guest_login","count","srv_count","serror_rate","srv_serror_rate","rerror_rate","srv_rerror_rate"
|
||||
,"same_srv_rate","diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count","dst_host_same_srv_rate","dst_host_diff_srv_rate"
|
||||
,"dst_host_same_src_port_rate:","dst_host_srv_diff_host_rate","dst_host_serror_rate"
|
||||
,"dst_host_srv_serror_rate","dst_host_rerror_rate","dst_host_srv_rerror_rate","label","biao"]
|
||||
train1,cal=np.split(total_data.sample(frac=1),[int(.4*len(total_data))])
|
||||
filename2="KDDTest+.csv"
|
||||
test_data=pd.read_csv(filepath+filename2,header=None)
|
||||
#cal=cal.sample(n=10000,random_state=2)
|
||||
train=train1[train1["label"]=="normal"].sample(n=20000,random_state=1)
|
||||
cal1=cal[cal["label"]=="normal"].sample(n=15000,random_state=1)
|
||||
cal2=cal[cal["label"]!="normal"].sample(n=15000,random_state=10)
|
||||
cal = cal1.append(cal2)
|
||||
test_data.columns=total_data.columns
|
||||
val,test1=np.split(test_data.sample(frac=1),[int(.5*len(test_data))])
|
||||
val.to_csv(Path+"验证集.csv",index=None)
|
||||
test1.to_csv(Path+"测试集.csv",index=None)
|
||||
train.to_csv(Path+"训练集.csv",index=None)
|
||||
cal.to_csv(Path+"校准集.csv",index=None)#训练模型得到模型
|
||||
|
||||
def count_data():
|
||||
Path="/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/"
|
||||
sample_ab=pd.read_csv(Path+"测试集.csv")
|
||||
sample_nor=pd.read_csv(Path+"训练集.csv")
|
||||
sample_cal=pd.read_csv(Path+"校准集.csv")
|
||||
sample_val=pd.read_csv(Path+"验证集.csv")
|
||||
|
||||
nor=0
|
||||
abn=0
|
||||
nor_va=len(sample_val[sample_ab["label"]=="normal"])
|
||||
ab_va=len(sample_val[sample_ab["label"]!="normal"])
|
||||
print("验证")
|
||||
print(nor_va)
|
||||
print(ab_va)
|
||||
nor=nor+len(sample_ab[sample_ab["label"]=="normal"])
|
||||
abn=abn+len(sample_ab[sample_ab["label"]!="normal"])
|
||||
|
||||
nor=nor+len(sample_nor[sample_nor["label"]=="normal"])
|
||||
abn=abn+len(sample_nor[sample_nor["label"]!="normal"])
|
||||
nor=nor+len(sample_cal[sample_cal["label"]=="normal"])
|
||||
abn=abn+len(sample_cal[sample_cal["label"]!="normal"])
|
||||
|
||||
print(nor)
|
||||
print(abn)
|
||||
|
||||
#训练模型得到校准集测试集文件
|
||||
def model_train(model):
|
||||
total_data=pd.read_csv(Path+"训练集.csv")
|
||||
x_train=total_data.drop(columns=["protocol_type","service","flag","src_bytes","label","biao"],axis=1)#训练去除无用列名
|
||||
x_train = pd.DataFrame(normalize(x_train.values), index=x_train.index, columns=x_train.columns)
|
||||
clf=eval(model+"_clf").fit(x_train)
|
||||
y_train_scores = clf.decision_scores_
|
||||
total_data['score']=y_train_scores
|
||||
total_data.to_csv(model+"训练集.csv",index=None)
|
||||
model_cal(clf,model)
|
||||
model_test(clf,model)
|
||||
model_val(clf,model)
|
||||
def model_cal(clf,model):
|
||||
total_data=pd.read_csv(Path+"校准集.csv")
|
||||
x_cal=total_data.drop(columns=["protocol_type","service","flag","src_bytes","label","biao"],axis=1)
|
||||
x_cal = pd.DataFrame(normalize(x_cal.values), index=x_cal.index, columns=x_cal.columns)
|
||||
y_cal_scores = clf.decision_function(x_cal)
|
||||
total_data['label_number']=total_data.label.apply(lambda x: 0 if x=="normal" else 1)
|
||||
total_data['score']=y_cal_scores
|
||||
total_data.to_csv(Path+"score/"+model+"校准集分数.csv",index=None)#记录校准集分数
|
||||
def model_test(clf,model):
|
||||
total_data=pd.read_csv(Path+"测试集.csv")
|
||||
x_test=total_data.drop(columns=["protocol_type","service","flag","src_bytes","label","biao"],axis=1)
|
||||
x_test = pd.DataFrame(normalize(x_test.values), index=x_test.index, columns=x_test.columns)
|
||||
y_test_scores = clf.decision_function(x_test)
|
||||
total_data['label_number']=total_data.label.apply(lambda x: 0 if x=="normal" else 1)
|
||||
total_data['score']=y_test_scores
|
||||
total_data.to_csv(Path+"score/"+model+"测试集分数.csv",index=None)#记录校准集分数
|
||||
|
||||
def model_val(clf,model):
|
||||
total_data=pd.read_csv(Path+"验证集.csv")
|
||||
x_test=total_data.drop(columns=["protocol_type","service","flag","src_bytes","label","biao"],axis=1)
|
||||
x_test = pd.DataFrame(normalize(x_test.values), index=x_test.index, columns=x_test.columns)
|
||||
y_test_scores = clf.decision_function(x_test)
|
||||
total_data['label_number']=total_data.label.apply(lambda x: 0 if x=="normal" else 1)
|
||||
total_data['score']=y_test_scores
|
||||
total_data.to_csv(Path+"score/"+model+"验证集分数.csv",index=None)#记录校准集分数
|
||||
if __name__=="__main__":
|
||||
|
||||
'''
|
||||
filepath='/home/shaoleshi/毕设/NSL_KDD-master/'
|
||||
filename='KDDTrain+.csv'
|
||||
read_csv(filepath,filename)#提取训练集,测试集,校准集
|
||||
'''
|
||||
#count_data()
|
||||
|
||||
for i in Model_list:
|
||||
model_train(i)
|
||||
print("已完成.format{}",i)
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import sklearn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
from sklearn import metrics
|
||||
import os
|
||||
|
||||
def accuracy_score(y, y_hat):
|
||||
return sum(yi == yi_hat for yi, yi_hat in zip(y, y_hat)) / len(y)
|
||||
|
||||
def precision_score(y, y_hat):
|
||||
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
predicted_positive = sum(y_hat)
|
||||
return true_positive / predicted_positive
|
||||
|
||||
def get_tpr(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
actual_positive = sum(y)
|
||||
|
||||
return true_positive / actual_positive
|
||||
|
||||
def result(model,predict):
|
||||
data=pd.read_csv(predict)
|
||||
data=data[data[model]!=2]
|
||||
accuracy=accuracy_score(np.array(data['label_number']),np.array(data[model]))
|
||||
precision=precision_score(np.array(data['label_number']),np.array(data[model]))
|
||||
tpr=get_tpr(np.array(data['label_number']),np.array(data[model]))
|
||||
f1=2 * precision * tpr / (precision + tpr)
|
||||
print(model+"的结果如下所示:")
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
print("F1值为{}".format(f1))
|
||||
print("预测个数值为{}".format(len(data)))
|
||||
|
||||
if __name__=="__main__":
|
||||
|
||||
Model=["PCA","MCD","IForest","LODA","LOF","KNN","OCSVM","CBLOF","HBOS","VAE"]
|
||||
predict="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/predict1.csv"
|
||||
|
||||
result("VAE",predict)
|
||||
#for i in Model:
|
||||
#result(i,predict);
|
|
@ -0,0 +1,60 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
import os
|
||||
|
||||
def accuracy_score(y, y_hat):
|
||||
return sum(yi == yi_hat for yi, yi_hat in zip(y, y_hat)) / len(y)
|
||||
|
||||
def precision_score(y, y_hat):
|
||||
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
predicted_positive = sum(y_hat)
|
||||
return true_positive / predicted_positive
|
||||
|
||||
def get_tpr(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
actual_positive = sum(y)
|
||||
|
||||
return true_positive / actual_positive
|
||||
|
||||
|
||||
|
||||
def handle(test,j):
|
||||
dd = test.sort_values(by='score',ascending=False)
|
||||
print(dd.head())
|
||||
max=0
|
||||
flag=0
|
||||
for i in range(1,112):
|
||||
limit=list(dd.score)[i*100]
|
||||
test['label_test']=test.score.apply(lambda x: 1 if x>limit else 0)
|
||||
precision=precision_score(np.array(test.label_number),np.array(test.label_test))
|
||||
tpr=get_tpr(np.array(test.label_number),np.array(test.label_test))
|
||||
f1=2 * precision * tpr / (precision + tpr)
|
||||
if max<f1:
|
||||
max=f1
|
||||
flag=limit
|
||||
data=pd.read_csv("/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/score/"+j+"测试集分数.csv")
|
||||
data['label_test']=data.score.apply(lambda x: 1 if x>flag else 0)
|
||||
accuracy=accuracy_score(np.array(data.label_number),np.array(data.label_test))
|
||||
precision=precision_score(np.array(data.label_number),np.array(data.label_test))
|
||||
tpr=get_tpr(np.array(data.label_number),np.array(data.label_test))
|
||||
f1=2 * precision * tpr / (precision + tpr)
|
||||
print(j+"的结果如下所示:")
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
print("F1值为{}".format(f1))
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
Model=["PCA","MCD","IForest","LODA","LOF","KNN","CBLOF","HBOS","VAE","OCSVM","AutoEncoder"]
|
||||
#Model=["CBLOF"]
|
||||
for i in Model:
|
||||
data=pd.read_csv("/home/shaoleshi/毕设/NSL_KDD-master/多模型协同/score/"+i+"验证集分数.csv")
|
||||
handle(data,i)
|
||||
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
|
||||
def read_csv(filepath,testname,trainname):
|
||||
train=pd.read_csv(filepath+trainname)
|
||||
|
||||
train['label_01']=train.label.apply(lambda x: 0 if x=="normal." else 1)
|
||||
train.to_csv("训练.csv")
|
||||
#train['new_col'] = list(zip(train.score, train.label_01))
|
||||
|
||||
#tac=train.new_col.tolist()
|
||||
|
||||
test=pd.read_csv(filepath+testname)
|
||||
|
||||
test['new_col'] = list(zip(test.score, test.label_number))
|
||||
tac=test.new_col.tolist()
|
||||
|
||||
tec=test.score.tolist()
|
||||
|
||||
p0,p1=VennABERS.ScoresToMultiProbs(tac,tec)
|
||||
print(p1.tolist()[9])
|
||||
|
||||
test['p0']=p0.tolist()
|
||||
test['p1']=p1.tolist()
|
||||
test.to_csv('p01.csv')
|
||||
|
||||
if __name__=='__main__':
|
||||
filepath='/home/shaoleshi/民航/数据/NUSW-NB15/'
|
||||
testname='测试.csv'
|
||||
trainname='训练.csv'
|
||||
|
||||
read_csv(filepath,testname,trainname)
|
||||
|
|
@ -0,0 +1,221 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import sklearn
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import VennABERS
|
||||
from sklearn import metrics
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import os
|
||||
|
||||
global yz
|
||||
|
||||
def plot(f,t):
|
||||
plt.plot(f, t, "r", marker='*', ms=1, label="a")
|
||||
plt.xlabel("fpr")
|
||||
plt.ylabel("tpr")
|
||||
plt.show()
|
||||
|
||||
def accuracy_score(y, y_hat):
|
||||
return sum(yi == yi_hat for yi, yi_hat in zip(y, y_hat)) / len(y)
|
||||
|
||||
def precision_score(y, y_hat):
|
||||
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
predicted_positive = sum(y_hat)
|
||||
return true_positive / predicted_positive
|
||||
|
||||
def get_tpr(y, y_hat):
|
||||
true_positive = sum(yi and yi_hat for yi, yi_hat in zip(y, y_hat))
|
||||
actual_positive = sum(y)
|
||||
|
||||
return true_positive / actual_positive
|
||||
|
||||
|
||||
|
||||
def count_p(p0,p1):
|
||||
'''
|
||||
global t
|
||||
global yz
|
||||
if p1-p0>=yz-0.000000001:
|
||||
return 2
|
||||
'''
|
||||
#if p1-p0>=0.3:
|
||||
#return 2
|
||||
|
||||
if p1/(1-p0+p1)>0.25 and p1/(1-p0+p1)<0.60:
|
||||
return 2
|
||||
|
||||
if p1/(1-p0+p1)>0.5: #可以用来调节roc
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def count_re(a,b,c,d,e,f,g,h,i,j):
|
||||
T=(a,b,c,d,e,f,g,h,i,j)
|
||||
r0=T.count(0)
|
||||
r1=T.count(1)
|
||||
if r0==0 and r1==0:
|
||||
return 2
|
||||
if r1>r0:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def p_yuzhi(p0,p1):
|
||||
return p1-p0
|
||||
|
||||
def ivap(p01,model):
|
||||
global yz
|
||||
'''
|
||||
p0p1="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/阈值.csv"
|
||||
data2=pd.read_csv(p0p1)
|
||||
yz=data2[model+"_p"].tolist()[0]
|
||||
print(yz)
|
||||
'''
|
||||
|
||||
data=pd.read_csv(p01)
|
||||
data[model]=data.apply(lambda x:count_p(x[model+"_p0"],x[model+"_p1"]),axis=1)
|
||||
pre="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/predict1.csv"
|
||||
if os.path.exists(pre):
|
||||
p_data=pd.read_csv(pre)
|
||||
p_data[model]=data[model]
|
||||
p_data.to_csv('predict1.csv',index=None)
|
||||
else:
|
||||
p_data=pd.merge(data['label_number'],data[model],left_index=True,right_index=True)
|
||||
p_data.to_csv('predict1.csv',index=None)
|
||||
|
||||
def result(predict):
|
||||
data=pd.read_csv(predict)
|
||||
data['venn_pre']=data.apply(lambda x:count_re(x["PCA"],x["MCD"],x["IForest"],x["LODA"],x["LOF"],x["KNN"],x["OCSVM"],x["CBLOF"],x["HBOS"],x["VAE"]),axis=1)
|
||||
|
||||
print(len(data))
|
||||
data=data[data['venn_pre']!=2]
|
||||
print(len(data))
|
||||
accuracy=accuracy_score(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
precision=precision_score(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
tpr=get_tpr(np.array(data['label_number']),np.array(data['venn_pre']))
|
||||
f1=2 * precision * tpr / (precision + tpr)
|
||||
|
||||
print("ivap的结果如下所示:")
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
print("F1值为{}".format(f1))
|
||||
#return list(fpr)[1],list(tpr1)[1]
|
||||
|
||||
|
||||
#处理p1-p0阈值的问题
|
||||
def p1p0(p01,model):
|
||||
i=0
|
||||
data=pd.read_csv(p01)
|
||||
data[model+'_p']=data.apply(lambda x:p_yuzhi(x[model+"_p0"],x[model+"_p1"]),axis=1)
|
||||
#pre="/home/shaoleshi/民航/数据/kddcup.data/多模型协同/阈值.csv"
|
||||
list1=data[model+'_p'].tolist()
|
||||
list1=sorted(list1)
|
||||
if model=="PCA":
|
||||
i=0.89
|
||||
if model=="MCD":
|
||||
i=0.95
|
||||
if model=="IForest":
|
||||
i=0.99
|
||||
if model=="LODA":
|
||||
i=0.93
|
||||
if model=="LOF":
|
||||
i=0.7
|
||||
if model=="KNN":
|
||||
i=0.7
|
||||
if model=="OCSVM":
|
||||
i=0.7
|
||||
if model=="CBLOF":
|
||||
i=0.6
|
||||
if model=="HBOS":
|
||||
i=0.6
|
||||
if model=="VAE":
|
||||
i=0.6
|
||||
|
||||
t=list1[int(len(list1)*i)]
|
||||
'''
|
||||
print(list1[int(len(list1)*0.1)])
|
||||
print(list1[int(len(list1)*0.25)])
|
||||
print(list1[int(len(list1)*0.5)])
|
||||
print(list1[int(len(list1)*0.75)])
|
||||
print(list1[int(len(list1)*0.99)])
|
||||
'''
|
||||
return t
|
||||
|
||||
|
||||
|
||||
#多模型投票所得结果
|
||||
def result1(predict,model):
|
||||
data=pd.read_csv(predict)
|
||||
#print(len(data))
|
||||
#data=data[data[model]!=2]
|
||||
#print(len(data))
|
||||
print("模型{}结果为".format(model))
|
||||
accuracy=accuracy_score(np.array(data['label_number']),np.array(data[model]))
|
||||
precision=precision_score(np.array(data['label_number']),np.array(data[model]))
|
||||
tpr=get_tpr(np.array(data['label_number']),np.array(data[model]))
|
||||
f1=2 * precision * tpr / (precision + tpr)
|
||||
print("准确率为{}".format(accuracy))
|
||||
print("精确率为{}".format(precision))
|
||||
print("召回率为{}".format(tpr))
|
||||
print("F1值为{}".format(f1))
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
global t
|
||||
f=[]
|
||||
tt=[]
|
||||
|
||||
Model=["PCA","MCD","IForest","LODA","LOF","KNN","OCSVM","CBLOF","HBOS","VAE"]
|
||||
p01="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/p01.csv"
|
||||
predict="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/predict.csv"
|
||||
filepath='/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/score/'
|
||||
predict1="/home/shaoleshi/民航/数据/NSL_KDD-master/NSL_KDD-master/多模型协同/predict1.csv"
|
||||
|
||||
'''
|
||||
#设置p1-p0参数
|
||||
dict_p={}
|
||||
for i in Model:
|
||||
t=p1p0(p01,i)
|
||||
dict_p[i+'_p']=t
|
||||
ppp=pd.DataFrame(dict_p,index=[0])
|
||||
print(dict_p)
|
||||
ppp.to_csv("阈值.csv",index=None)
|
||||
|
||||
#ivap(p01,"IForest") #0.85
|
||||
#result1(predict1,"IForest")
|
||||
|
||||
#ivap(p01,"PCA") #0.6
|
||||
#result1(predict1,"PCA")
|
||||
|
||||
#ivap(p01,"LOF") #0.7
|
||||
#result1(predict1,"LOF")
|
||||
|
||||
'''
|
||||
#输出每个模型的预测结果
|
||||
for i in Model:
|
||||
ivap(p01,i)
|
||||
#result1(predict1,i)
|
||||
|
||||
|
||||
result(predict1)
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
#设置p1-p0参数
|
||||
dict_p={}
|
||||
for i in Model:
|
||||
t=p1p0(p01,i)
|
||||
dict_p[i+'_p']=t
|
||||
ppp=pd.DataFrame(dict_p,index=[0])
|
||||
print(dict_p)
|
||||
ppp.to_csv("阈值.csv",index=None)
|
||||
'''
|
||||
|
||||
|
Loading…
Reference in New Issue