我有两个数据表,其中包含如下数据:-
| id | name | dob | | name | dob |
|-------|------|----------| |------|----------|
| 12345 | ABC | 20010301 | | ABC | 20010301 | - matching record
| 45678 | DEF | 20010425 | | XYZ | 20010301 | - unmatched record
是否可以编写一个比较两个表的查询,然后创建一个匹配和不匹配的表,而只保留原始表结构/数据?
Match Table Unmatched Table
| id | rank | | id | rank |
|-------|------| |-------|------|
| 12345 | 1 | | 45678 | NULL |
我正在尝试使用 MERGE,但我必须插入/更新一个源表,并且在 tsql 方面我已经达到了上限——我还将处理超过 30,000,000 行的数据集——有什么建议/建议吗?
我目前的sql(字段不匹配但原理有)如下?
Create TABLE #Cohort ([ID] varchar(4),[match rank] int)
INSERT INTO #Cohort ([ID],[match rank]) VALUES('aaaa',NULL)
INSERT INTO #Cohort ([ID],[match rank]) VALUES('bbbb',NULL)
INSERT INTO #Cohort ([ID],[match rank]) VALUES('cccc',NULL)
INSERT INTO #Cohort ([ID],[match rank]) VALUES('dddd',NULL)
Create TABLE #link ([ID] varchar(4),[match rank] int)
INSERT INTO #link ([ID],[match rank]) VALUES(left(NEWID(),4),NULL)
INSERT INTO #link ([ID],[match rank]) VALUES(left(NEWID(),4),NULL)
INSERT INTO #link ([ID],[match rank]) VALUES('aaaa',NULL)
INSERT INTO #link ([ID],[match rank]) VALUES(left(NEWID(),4),NULL)
Create TABLE #Matches ([ID] varchar(4),[match rank] int)
Create TABLE #Unmatched ([ID] varchar(4),[match rank] int)
MERGE #Cohort tg
USING (SELECT distinct c.[ID], 1 as [match rank]
from #Cohort c
INNER JOIN #link as h on c.[ID]=h.[ID]) sc
ON (tg.[ID] = sc.[ID] )
WHEN NOT MATCHED BY TARGET
THEN INSERT([ID],[match rank]) VALUES(sc.[ID],sc.[match rank])
WHEN NOT MATCHED BY SOURCE
THEN DELETE
OUTPUT Deleted.* INTO #Unmatched;