第 1 步:将关键字分成适当的关系(表格)。
CREATE TABLE Keywords (KeywordID int IDENTITY(1,1) NOT NULL
, Keyword NVARCHAR(256)
, constraint KeywordsPK primary key (KeywordID)
, constraint KeywordsUnique unique (Keyword));
第 2 步:将公司和标签之间的多对多关系映射到单独的表中,就像所有多对多关系一样:
CREATE TABLE CompanyKeywords (
CompanyID int not null
, KeywordID int not null
, constraint CompanyKeywords primary key (KeywordID, CompanyID)
, constraint CompanyKeyword_FK_Companies
foreign key (CompanyID)
references Companies(CompanyID)
, constraint CompanyKeyword_FK_Keywords
foreign key (KeywordID)
references Keywords (KeywordID));
第 3 步:使用简单的 GROUP BY 查询生成“云”(例如,将“云”表示最常见的 100 个标签):
with cte as (
SELECT TOP 100 KeywordID, count(*) as Count
FROM CompanyKeywords
group by KeywordID
order by count(*) desc)
select k.Keyword, c.Count
from cte c
join Keyword k on c.KeywordID = k.KeywordID;
第 4 步:缓存结果,因为它很少更改并且计算成本很高。