sql-server-2008 - sql server中的拆分和匹配行

Question

我正在使用 SQL Server 2008

我的表如下所示：

ID      Column
----------------------------------
1       This is a Sample Text
2       Sample Text is typed here
3       Here the sample text is
4       Typing a sample

我需要输出是这样的：

ID Column                     MostCommon  Common1  Common2  NonCommon
---------------------------------------------------------------------------------------
1  This is a Sample Text      Sample      Text     is       This a
2  Sample Text is typed here  Sample      Text     is       typed here
3  Here the sample text is    Sample      Text     is       Here the
4  Typing a sample            sample      NULL     NULL     Typing A

谁能帮我在 SQL Server 2008 中编写一个 sp/function/query

'sample' 出现在所有行中。所以我可以将它保留为最常见的单词，'text，'is'，下一个最常见的单词，可以在第 1、2、3 行中找到。所有其他单词与其他行不匹配，将移至非常见类别

score 0 · Accepted Answer

这是你可以做到的，首先你必须创建一个分割字符串的函数，然后计算出现次数并根据需要显示它们。它使它变得更加复杂，因为您想要显示可变数量的列：

/*
CREATE FUNCTION dbo.SplitStrings_XML
(
   @List       NVARCHAR(MAX),
   @Delimiter  NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
   RETURN 
   (  
      SELECT Item = y.i.value('(./text())[1]', 'nvarchar(4000)')
      FROM 
      ( 
        SELECT x = CONVERT(XML, '<i>' + 
                   REPLACE(@List, @Delimiter, '</i><i>') + '</i>').query('.')
      ) AS a CROSS APPLY x.nodes('i') AS y(i)
   );
GO
*/

CREATE TABLE #t(ID INT, Col VARCHAR(1000))
INSERT #t
VALUES
(1,       'This is a Sample Text'),
(2,       'Sample Text is typed here'),
(3,       'Here the sample text is'),
(4,       'Typing a sample')

DECLARE @MinimumNumberOfOccurances INT = 2

SELECT  a.ID,
        a.Col,
        b.Item
INTO    #SplitedStrings
FROM    #t a
CROSS APPLY dbo.SplitStrings_XML(a.Col, N' ') b


SELECT  b.Item,
        COUNT(*) cnt
INTO    #SplitedStringsGrouped
FROM    #t a
CROSS   APPLY dbo.SplitStrings_XML(a.Col, N' ') b
GROUP   BY b.Item

SELECT      b.*,
            a.cnt
INTO        #ResultTable
FROM        #SplitedStringsGrouped a
RIGHT JOIN  #SplitedStrings b ON 
            b.Item = a.Item
            AND a.cnt > @MinimumNumberOfOccurances
ORDER BY    b.ID, a.cnt DESC, LEN(a.Item) DESC

DECLARE @ColumnNames VARCHAR(1000) = STUFF(
(
    SELECT  ',[' + Item + ']'
    FROM    #SplitedStringsGrouped
    WHERE   cnt > @MinimumNumberOfOccurances
    FOR     XML PATH('')
)
, 1, 1, '')

DECLARE @TableHeader VARCHAR(1000) = STUFF(
(
    SELECT  ',MAX([' + Item + ']) AS [Common' + 
            CAST((ROW_NUMBER() OVER 
                (ORDER BY cnt DESC, LEN(Item) DESC) - 1) 
                    AS VARCHAR(5)) 
            + ']'
    FROM    #SplitedStringsGrouped
    WHERE   cnt > @MinimumNumberOfOccurances
    FOR     XML PATH('')
)
, 1, 1, '')

SELECT  ID,
        Item,
        ROW_NUMBER() OVER 
            (PARTITION BY ID ORDER BY ID) Num
INTO    #NonCommon
FROM    #ResultTable
WHERE   cnt IS NULL

DECLARE @sql VARCHAR(1000) = 
'
SELECT  MAX(pvt.ID) ID, MAX(pvt.Col) [Column],
        '+@TableHeader+',
        RTRIM((
            SELECT  a.Item + '' ''
            FROM    #NonCommon a
            WHERE   a.ID = pvt.ID
            FOR     XML PATH('''')
        )) NonCommon        
FROM    #ResultTable a
PIVOT   (
    MAX(Item) FOR Item IN ('+@ColumnNames+')
) pvt
GROUP  BY pvt.ID
'

EXEC(@sql)

DROP TABLE #t
DROP TABLE #SplitedStringsGrouped
DROP TABLE #SplitedStrings
DROP TABLE #ResultTable
DROP TABLE #NonCommon

sql-server-2008 - sql server中的拆分和匹配行

1 回答 1

Related

Reference