kdd.py

zhangbohan

Uploaded on: March 26, 2021, 2:23 a.m.
.python

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matrixprofile as mp
import os
from matrixprofile import *

#读取所有文件
def read_csv_file():
    yseries = []
    index_dict = []
    files=os.listdir('PhaseI/')
    print(files)
    for f in files:
        idx=f.split('/')[-1]
        print(idx)
        csv_path='PhaseI/'+f
        df=pd.read_csv(csv_path,names=['values'])
        values=df['values'].values
        print(values.shape)
        yseries.append(values)
        index_dict.append(idx)
    return yseries,index_dict

values,_=read_csv_file()

def compute_an(values,window_size=100):
    res=[]
    for value in values:
        profile=mp.compute(value, window_size)
        re=mp.discover.discords(profile)['discords']
        res.append(re)
    return res
res=compute_an(values)
print(res)

res_avg=[]
for i in res:
    t=np.sort(i)
    res_avg.append(t[1])
print(res_avg)


Posted: March 26, 2021, 2:26 a.m.

zhangbohan

Ues res_avg to compute the median of the outliers. And the result is just the res_avg, but when the outliers aren't larger than the label, we will choose another number.