理想情况下,您会将数据标准化并将单词存储在单独的表中。
但是,对于您的即时需求,您首先需要提供一个 UDF 将“desc”拆分为单词。我挖了这个功能:
-- this function splits the provided strings on a delimiter
-- similar to .Net string.Split.
-- I'm sure there are alternatives (such as calling string.Split through
-- a CLR function).
CREATE FUNCTION [dbo].[Split]
(
@RowData NVARCHAR(MAX),
@Delimeter NVARCHAR(MAX)
)
RETURNS @RtnValue TABLE
(
ID INT IDENTITY(1,1),
Data NVARCHAR(MAX)
)
AS
BEGIN
DECLARE @Iterator INT
SET @Iterator = 1
DECLARE @FoundIndex INT
SET @FoundIndex = CHARINDEX(@Delimeter,@RowData)
WHILE (@FoundIndex>0)
BEGIN
INSERT INTO @RtnValue (data)
SELECT
Data = LTRIM(RTRIM(SUBSTRING(@RowData, 1, @FoundIndex - 1)))
SET @RowData = SUBSTRING(@RowData,
@FoundIndex + DATALENGTH(@Delimeter) / 2,
LEN(@RowData))
SET @Iterator = @Iterator + 1
SET @FoundIndex = CHARINDEX(@Delimeter, @RowData)
END
INSERT INTO @RtnValue (Data)
SELECT Data = LTRIM(RTRIM(@RowData))
RETURN
END
然后您需要拆分描述并进行一些分组(如果数据已标准化,您也可以这样做)
-- get the count of each grp_id
with group_count as
(
select grp_id, count(*) cnt from [Group]
group by grp_id
),
-- get the count of each word in each grp_id
group_word_count as
(
select count(*) cnt, grp_id, data from
(
select * from [group] g
cross apply dbo.Split(g.[Desc], ' ')
)
t
group by grp_id, data
)
-- return rows where number of grp_id = number of words in grp_id
select gwc.GRP_ID, gwc.Data [Desc] from group_word_count gwc
inner join group_count gc on gwc.GRP_ID = gc.GRP_ID and gwc.cnt = gc.cnt
[Group] 是您的桌子。