创建这个函数:
create function dbo.extract_filenames_from_a_tags (@s nvarchar(max))
returns @res table (pdf nvarchar(max)) as
begin
-- assumes there are no single quotes or double quotes in the PDF filename
declare @i int, @j int, @k int, @tmp nvarchar(max);
set @i = charindex(N'.pdf', @s);
while @i > 0
begin
select @tmp = left(@s, @i+3);
select @j = charindex('/', reverse(@tmp)); -- directory delimiter
select @k = charindex('"', reverse(@tmp)); -- start of href
if @j = 0 or (@k > 0 and @k < @j) set @j = @k;
select @k = charindex('''', reverse(@tmp)); -- start of href (single-quote*)
if @j = 0 or (@k > 0 and @k < @j) set @j = @k;
insert @res values (substring(@tmp, len(@tmp)-@j+2, len(@tmp)));
select @s = stuff(@s, 1, @i+4, ''); -- remove up to ".pdf"
set @i = charindex(N'.pdf', @s);
end
return
end
GO
使用该功能的演示:
declare @t table (html varchar(max));
insert @t values
('
<p>A deferred tuition payment plan,
or view the <a href="/uploadedFiles/Tuition-Reimbursement-Deferred.pdf"
target="_blank">list</a>.</p>'),
('
<p>A deferred tuition payment plan,
or view the <a href="Two files here-Reimbursement-Deferred.pdf"
target="_blank">list</a>.</p>And I use single quotes
<a href=''/look/path/The second file.pdf''
target="_blank">list</a>');
select t.*, p.pdf
from @t t
cross apply dbo.extract_filenames_from_a_tags(html) p;
结果:
|HTML | PDF |
--------------------------------------------------------------------
|<p>A deferred tui.... | Tuition-Reimbursement-Deferred.pdf |
|<p>A deferred tui.... | Two files here-Reimbursement-Deferred.pdf |
|<p>A deferred tui.... | The second file.pdf |
SQL 小提琴演示