我创建了您的测试用例和一些我在开发答案时想到的额外内容:-
set nocount on
go
create table #log (
url varchar(50)
)
go
insert into #log values('http://www.groupon.com/')
insert into #log values('http://www.groupon.com/today')
insert into #log values('https://groupon.com/deals')
insert into #log values('http://www.wikihow.com/Main-Page')
insert into #log values('http://www.wikihow.com/Main-Page')
insert into #log values('https://www.google.com/imghp')
insert into #log values('http://google.com/flights')
insert into #log values('http://www.amazon.com/cameras')
insert into #log values('https://www.amazon.co.uk/cameras')
go
然后创建了这个函数:-
create function dbo.GetDomain(@url varchar(1024))
returns varchar(1024) as begin
declare @returnValue varchar(1024)
declare @position int
declare @pattern varchar(50)
if CHARINDEX('google.com/imghp',@url)>0 --if it's your special case
set @returnValue='google.com/imghp'
else begin
set @url=replace(@url,'http://','') --strip off typical prefixes
set @url=replace(@url,'https://','')
set @position=CHARINDEX('/',@url) --check if there is a slash
if @position>0
set @url=left(@url,@position-1) --and strip it and everything after it off
select @pattern=case
when PATINDEX('%.co.uk',@url)>0 then '%.%.%.%' --repeat this line for any other exceptions
else '%.%.%'
end
while PATINDEX(@pattern,@url)>0 begin --now check if our pattern matches
set @url=SUBSTRING(@url,CHARINDEX('.',@url)+1,1024) --and strip off the prefix if it does
end
set @returnValue=@url
end
return @ReturnValue
end
go
调用时:-
select *, dbo.GetDomain(url) as domain
from #log
go
产生:-
url domain
http://www.groupon.com/ groupon.com
http://www.groupon.com/today groupon.com
https://groupon.com/deals groupon.com
http://www.wikihow.com/Main-Page wikihow.com
http://www.wikihow.com/Main-Page wikihow.com
https://www.google.com/imghp google.com/imghp
http://google.com/flights google.com
http://www.amazon.com/cameras amazon.com
https://www.amazon.co.uk/cameras amazon.co.uk