import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.cluster import MeanShift,estimate_bandwidth
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN
from sklearn import metrics
import warnings
warnings.filterwarnings('ignore')
'''
data From:http://archive.ics.uci.edu/ml/datasets/Dishonest+Internet+users+Dataset
'''
#读取数据
dataset = []
with open('clustering.txt','r') as f:
for line in f.readlines():
attrs = line[0:-1].split(' ')
dataset.append(attrs)
dataset = np.array(dataset)
#预处理
for index,value in enumerate(dataset[0]):
if value.isdigit():
dataset[:,in