如何在无限级分类表中，定位准确的分类项（五：使用变量存储正则代替地域表读取）（以城市地域为例）（mssql）

本文链接：https://blog.csdn.net/superwfei/article/details/103197065

之前的函数，基本上满足了获取地域信息的需求，但是，存在的最严重的问题就是效率问题，每次调用PickCityId函数时，都要从新读取一遍dict_cities表，这个效率就很感人了，所以，优化势在必行

考虑到之前已经建立了正则clr，老顾觉得，用正则作为优化依据比较靠谱

比如这样一个正则“(?#1:0|215|1)北京|(?#3:0|155|3)上海”，可以直接进行匹配北京和上海的信息，而?#为正则注释，这个里边的内容不参与正则运算，但匹配后，我们可以对原始正则字符串再次匹配，比如匹配到北京，我们可以根据原始正则字符串，拿到注释里的内容，嗯，就是这么个思路，这样，每次使用PickCityId方法前，只需要读一次数据库，用来生成正则即可

CREATE FUNCTION [dbo].[CityIdReg] 
(
)
RETURNS nvarchar(max)
AS
BEGIN
	DECLARE @reg nvarchar(max)

	select @reg = stuff((
			select '|(?#'+convert(varchar,city_id)+':'+city_path+')'+n
			from dict_cities ci with (nolock) 
			cross apply (
				select isnull(
				(case 
					when master.dbo.regexismatch(isnull(abbr,''),'[a-z]')=1 
					then city_name else abbr end
				)
				,master.dbo.regexreplace(city_name,'[矿新]区$|自治.*$|[县区]$','')) as n
			) b
			where city_del=0 
			and isnull(targetId,0)=0
			and datalength(n)>0
			order by n desc  -- 为了避免出现异常，对条目进行排序，避免出现深圳匹配不到，却匹配到深县（因为县字已被删除）
			for xml path('')
		),1,1,'')

	RETURN @reg

END

通过一个自定义函数，直接返回一个很长的正则字符串，除匹配内容外，还包含了地域ID和path，嗯，都在注释里

然后，改造一下我们的PickCityId函数

ALTER FUNCTION [dbo].[PickCityId] 
(
	@xml xml,@reg nvarchar(max)
)
RETURNS int
AS
BEGIN
	DECLARE @id int

	declare @d table(addr nvarchar(max),wsn int)
	insert into @d
	select 
	replace(match,'杨凌','杨陵') as addr
	,sn as wsn
	from master.dbo.regexmatches(convert(nvarchar(max),@xml),'(?<=\w+=")[^"]+')

	declare @t2 table(
		addr nvarchar(max),wsn int,city_id int,city_path varchar(100),loc nvarchar(100)
		,sn int,match nvarchar(100),pos int,mlen int
	)
	insert into @t2
	select addr,wsn,city_id,city_path,b.match,b.sn,b.match,b.[index],datalength(b.match) 
	from @d a
	cross apply (
		select * from master.dbo.RegexMatches(addr,@reg)
	) b
	cross apply (
		-- 因为有多个类似朝阳、钟山、海南这样的结果，所以将所有结果都提取出来
		select match as n 
		from master.dbo.RegexMatches(@reg,'(?<=^|[\|])\([^\)]+\)'+b.match+'(?![东南西北中]?大?[路街村镇乡]|大道)'+'(?=$|[\|])')
	) c
	cross apply (
		select master.dbo.RegexMatch(n,'(?<=\(\?#)\d+') as city_id
		,master.dbo.RegexMatch(n,'(?<=\(\?#\d+:)[\d\|]+') as city_path
	) d

	declare @t3 table(city_id int,city_path varchar(100),sn int,pos int,mlen int,lv int,wsn int,match nvarchar(100))
	insert into @t3
	select city_id,city_path,sn,pos,mlen,lv,wsn,match 
	from @t2
	cross apply (
		select count(0) as lv 
		from master.dbo.splitstr(city_path,'|')
	) b
	where lv>2 and master.dbo.regexismatch(match,'^[东南西北中]$')=0

	declare @t4 table(city_id int,city_path varchar(100),sn int,pos int,mlen int,lv int,wsn int,match nvarchar(100))
	insert into @t4
	select top 1 * from @t3 order by pos,mlen desc,lv

	declare @t5 table(city_id int,city_path varchar(100),sn int,pos int,mlen int,lv int,wsn int,match nvarchar(100))
	insert into @t5
	select t3.*
	from @t4 t4
	left join @t3 t3 on charindex(t4.city_path,t3.city_path)>0
	where not (t3.lv<>t4.lv and t3.pos=t4.pos and t3.mlen<=t4.mlen and t3.wsn=t4.wsn)

	;with t6 as (
		select *,(case 
			when exists(select top 1 1 from @t5 where match=a.match and lv<a.lv) then 1 
			when exists(select top 1 1 from @t5 where charindex(a.match,match)>0 and lv<a.lv) then 2 
			else 0 end
		) as same 
		from @t5 a
		cross apply (
			select count(0) as p
			from @t5 
			where city_id<>a.city_id and charindex(city_path,a.city_path)>0
		) b
		where not exists(
			select top 1 1 
			from @t5 
			where pos=a.pos and lv<a.lv and wsn=a.wsn and match=a.match and lv>3
		)
	)
	select @id = (
		select top 1 city_id
		from t6 a
		order by a.p desc,same,pos,mlen desc
	)

	RETURN @id
END

好了，第一次优化完成