0

我有两个重叠数据的表。一张表大约是另一张表的 90% 重复。我需要识别表中 10% 的唯一记录并将其移动到它的父表。这两个表都是 4 亿多行,300 多列。我正在尝试的方法是添加一个标志字段来唯一标识我需要传输的记录,但是我需要更新该字段并且正在努力解决逻辑问题。以下是我到目前为止汇总的内容,它导致了一个永无止境的循环。两个表中都没有空值。

Declare @counter int 
Declare @RowsEffected int 
Declare @RowsCnt int 
Declare @Err int
SELECT @COUNTER = 1
SELECT @RowsEffected = 0

while (@counter > 0)
begin
set Rowcount 10000000

update Table1
set Existsflg = 1
where exists (
Select Fields
from Table1
Except
Select Fields
from table2 )

Select @RowsCnt = @@ROWCOUNT , @Err = @@ERROR
If @Err <> 0
begin
Print 'Problem Updating the records'
end
IF @RowsCnt = 0
SELECT @COUNTER = 0 
ELSE
SELECT @RowsEffected = @RowsEffected + @RowsCnt 
PRINT 'The total number of rows effected :'+convert(varchar,@RowsEffected)     
WAITFOR DELAY '00:00:10'        
END 
SET ROWCOUNT 0
Go

谢谢!

4

2 回答 2

0

我建议您一次分批执行 1M-5M,因为您有大量数据要更新。

在这种情况下我会做的是:

a) 添加名为 Processed (bit) 的新列,该列将为所有已处理的行更新

b)在临时表中选择 1M 行(这可能不需要,但它会让事情变得更干净)

c) 将所有非重复记录插入到其他表中

d) 更新行并将它们标记为进程

于 2013-06-06T10:01:14.517 回答
0

这就是我曾经做过的一次。

我没有使用 RowCount,我使用 Select TOP (N) 和“while exists”

我的“源” dbo.Employee 表在另一台服务器上。

GO
USE [$(DestinationDatabaseName)]
GO





/*

READ ME !!!

Replace
    $(SourceServer).$(SourceDatabaseName)
With                            (the Server and DatabaseName of the SOURCE data)
    (ex:)  [OtherServer].[OtherDatabase]


*/




--SubFolder: SQLReplicateReplacer
print '[uspEmployeeReplicateReplacer]'
go


IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[uspEmployeeReplicateReplacer]') AND type in (N'P', N'PC'))
DROP PROCEDURE [dbo].[uspEmployeeReplicateReplacer]
Go


/*


declare @numberRowsAffected  int
declare @ErrorNumber int

exec [dbo].[uspEmployeeReplicateReplacer] @numberRowsAffected output , @ErrorNumber  output

print @numberRowsAffected 
print @ErrorNumber 
print ''

*/



CREATE PROCEDURE [dbo].[uspEmployeeReplicateReplacer] (
@numberRowsAffected int output  --return
,
@ErrorNumber int output
)

AS


SET NOCOUNT ON


select @ErrorNumber = 0


declare @ErrorTracker int 
declare @insertRowCount int
declare @updateRowCount int 
select @insertRowCount = 0
select @updateRowCount = 0





IF OBJECT_ID('tempdb..#Employeeupdate') IS NOT NULL
begin
        drop table #Employeeupdate
end






CREATE TABLE #Employeeupdate (  
EmployeeKeyID int IDENTITY (1,1),

EmployeeUUID uniqueidentifier,
EmployeeLabel varchar(64),
EmployeeDescription varchar(128)


)



declare @ManualReplicationRowCount int
/* I put this value in a stored procedure, so I could change it in one place */
/* EXEC dbo.uspInternalSettingGetManualReplicationRowCount @ManualReplicationRowCount output */
Select @ManualReplicationRowCount = 1000 


declare @MaximumLoopCounter int 


select @MaximumLoopCounter = 10000



while (@MaximumLoopCounter > 0) and exists 
(


Select

    TOP 1 null


from [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock)


    where not exists
    (
        select null from dbo.Employee   with (nolock)   --  destinationTable
        Where

            /*
            destinationTable.SOMEUNIQUECOLUMN1 = sourceTable.SOMEUNIQUECOLUMN1
            and
            destinationTable.SOMEUNIQUECOLUMN2 = sourceTable.SOMEUNIQUECOLUMN2
            */

dbo.Employee.EmployeeUUID = vart.EmployeeUUID 


    )

)



BEGIN


    select @MaximumLoopCounter = @MaximumLoopCounter - 1




DELETE FROM #Employeeupdate




Insert into #Employeeupdate  
(   
        EmployeeUUID, 
        EmployeeLabel, 
        EmployeeDescription

)

Select

    TOP (@ManualReplicationRowCount)

        EmployeeUUID, 
        EmployeeLabel, 
        EmployeeDescription


    from [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock)

    where not exists
    (
        select null from dbo.Employee   with (nolock)   --  destinationTable
        Where

            /*
            destinationTable.SOMEUNIQUECOLUMN1 = sourceTable.SOMEUNIQUECOLUMN1
            and
            destinationTable.SOMEUNIQUECOLUMN2 = sourceTable.SOMEUNIQUECOLUMN2
            */

dbo.Employee.EmployeeUUID = vart.EmployeeUUID 


    )








SET NOCOUNT OFF
Insert into dbo.Employee 
(   
        EmployeeUUID, 
        EmployeeLabel, 
        EmployeeDescription

)

Select

        EmployeeUUID, 
        EmployeeLabel, 
        EmployeeDescription


from 
    #Employeeupdate  


SELECT @insertRowCount = @@ROWCOUNT , @ErrorTracker = @@ERROR

if @ErrorTracker <> 0
    BEGIN

        select @ErrorNumber = @ErrorTracker 
        select @MaximumLoopCounter = 0 --Bail Out !!!
    END



SET NOCOUNT ON




END --End While Loop














/*

SET NOCOUNT OFF

Update dbo.Employee
Set

  --EmployeeUUID = vart.EmployeeUUID,
EmployeeLabel = vart.EmployeeLabel,
EmployeeDescription = vart.EmployeeDescription


From

    dbo.Employee with (nolock) , [$(SourceServer)].[$(SourceDatabaseName)].dbo.Employee vart with (nolock)
Where
        --Relationship
dbo.Employee.EmployeeUUID = vart.EmployeeUUID 




SELECT @updateRowCount = @@ROWCOUNT

SET NOCOUNT ON


*/




SELECT @numberRowsAffected = @insertRowCount + @updateRowCount


print '/#Employeeupdate COUNT/'
print @numberRowsAffected 
print '-------------------------'





IF OBJECT_ID('tempdb..#Employeeupdate') IS NOT NULL
begin
        drop table #Employeeupdate
end







SET NOCOUNT OFF


GO

GRANT EXECUTE ON dbo.uspEmployeeReplicateReplacer TO $(DBUSERNAME)

GO
于 2013-06-05T21:35:55.150 回答