DBSCAN聚类算法-SQL实现

 

CREATE TABLE #T
(
	p VARCHAR(10),
	x DECIMAL(18,6),
	y DECIMAL(18,6),
	j VARCHAR(10),
	c BIT DEFAULT 0 
)

INSERT INTO #t(p,x,y) VALUES('p1',1,2)
INSERT INTO #t(p,x,y) VALUES('p2',2,1)
INSERT INTO #t(p,x,y) VALUES('p3',2,4)
INSERT INTO #t(p,x,y) VALUES('p4',4,3)
INSERT INTO #t(p,x,y) VALUES('p5',5,8)
INSERT INTO #t(p,x,y) VALUES('p6',6,7)
INSERT INTO #t(p,x,y) VALUES('p7',6,9)
INSERT INTO #t(p,x,y) VALUES('p8',7,9)
INSERT INTO #t(p,x,y) VALUES('p9',9,5)
INSERT INTO #t(p,x,y) VALUES('p10',1,12)
INSERT INTO #t(p,x,y) VALUES('p11',3,12)
INSERT INTO #t(p,x,y) VALUES('p12',5,12)
INSERT INTO #t(p,x,y) VALUES('p13',3,3)
 

SELECT TOP 0 * INTO #M FROM #T 


DECLARE @eps INT=3 
DECLARE @MinPts INT=3
DECLARE @cnt INT 
DECLARE @i INT =0
DECLARE @p VARCHAR(10)
DECLARE @x DECIMAL(18,6)
DECLARE @y DECIMAL(18,6)


SELECT TOP 1 @p=p,@x=x,@y=y FROM #t WHERE c=0
WHILE ISNULL(@p,'')<>''
BEGIN 
	UPDATE #t SET c=1 WHERE p=@p 
	
	INSERT INTO #M(p,x,y,c)
	SELECT p,x,y,0 FROM 
	(
		SELECT p,x,y,SQRT(POWER(@x-x,2)+POWER(@y-y,2)) AS eps 
		FROM #T
	) a WHERE  eps<=@eps

	SELECT @cnt=COUNT(1) FROM #M 
	IF @cnt>=@MinPts
	BEGIN
		UPDATE #m SET c=1 WHERE p=@p 
	 
		SELECT TOP 1 @p=p,@x=x,@y=y FROM #m WHERE c=0 
		WHILE ISNULL(@p,'')<>''
		BEGIN
			UPDATE #m SET c=1 WHERE p=@p 
			
			INSERT INTO #M(p,x,y,c)
			SELECT p,x,y,0 FROM 
			(
				SELECT p,x,y,SQRT(POWER(@x-x,2)+POWER(@y-y,2)) AS eps FROM #T a where c=0 
			) a WHERE  eps<=@eps AND NOT EXISTS(SELECT 1 FROM #M WHERE p=a.p)
			
			SET @p=NULL
			SELECT TOP 1 @p=p,@x=x,@y=y FROM #m WHERE c=0 
		END 
		SET @i=@i+1
		UPDATE a  SET c=1,j='C'+CONVERT(VARCHAR(10),@i)
		FROM #T a INNER JOIN #M b ON b.p = a.p
	END 	
	 
	TRUNCATE TABLE #M

	SET @p=NULL
	SELECT TOP 1 @p=p,@x=x,@y=y FROM #t WHERE c=0
END 

SELECT * FROM #T
 

DROP TABLE #M
DROP TABLE #T

  算法描述:https://blog.csdn.net/dsdaasaaa/article/details/94590159

  • 3
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
下面是st-dbscan聚类算法的Python代码实现: ```python import numpy as np from scipy.spatial.distance import pdist, squareform def stdbscan(data, eps, min_pts): """ ST-DBSCAN algorithm implementation. Parameters: data (ndarray): Input data. eps (float): The maximum distance between two samples for them to be considered as in the same neighborhood. min_pts (int): The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. Returns: labels (ndarray): Cluster labels for each point. -1 for noise points. """ # Compute pairwise distance matrix dist_mat = squareform(pdist(data)) # Initialize variables num_pts = data.shape[0] visited = np.zeros(num_pts, dtype=bool) labels = np.zeros(num_pts, dtype=int) cluster_id = 0 # Iterate over all points for i in range(num_pts): if not visited[i]: visited[i] = True # Get neighbors within eps distance neighbors = np.where(dist_mat[i] <= eps)[0] # Check if there are enough neighbors if len(neighbors) < min_pts: labels[i] = -1 # Noise point else: cluster_id += 1 labels[i] = cluster_id # Expand cluster j = 0 while j < len(neighbors): neighbor = neighbors[j] if not visited[neighbor]: visited[neighbor] = True # Get neighbors within eps distance new_neighbors = np.where(dist_mat[neighbor] <= eps)[0] # Check if there are enough neighbors if len(new_neighbors) >= min_pts: neighbors = np.concatenate((neighbors, new_neighbors)) # Assign to cluster if labels[neighbor] == 0: labels[neighbor] = cluster_id j += 1 return labels ``` 其中,`data`是输入数据,`eps`是最大距离阈值(即点与点之间的距离超过该值则不再属于同一簇),`min_pts`是最小密度阈值(即点周围的其它点个数不足该值时不再属于核心点)。函数返回每个点的聚类标签,-1表示噪声点。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值