我在嵌套关系的 excel 数据导入中遇到性能缓慢的问题。
我有两个要插入的主表和其他四个表,它们与主表具有一对多和多对多的关系。
在为两个主表插入数据之前,我检查了四个表的导入数据是现有的还是新的,因为导入的文件可能具有四个表的相同数据,并且相同的数据不能插入多次。
这就是性能缓慢的原因。
我该如何解决这个问题?
我在嵌套关系的 excel 数据导入中遇到性能缓慢的问题。
我有两个要插入的主表和其他四个表,它们与主表具有一对多和多对多的关系。
在为两个主表插入数据之前,我检查了四个表的导入数据是现有的还是新的,因为导入的文件可能具有四个表的相同数据,并且相同的数据不能插入多次。
这就是性能缓慢的原因。
我该如何解决这个问题?
使用 .Net ReadAllLines() 方法将整个文件读入字符串数组对象,然后运行 Parallel For 循环以并行处理所有行。
private bool ProcessFile(string FolderPath, string FileExtension)
{
try
{
//all files with requisite file extension
DirectoryInfo dinfo = new DirectoryInfo(FolderPath);
FileInfo[] Files = dinfo.GetFiles(FileExtension);
foreach (FileInfo file in Files)
{
List<String> AllLines = new List<String>();
using (StreamReader sr = File.OpenText(file.FullName))
{
int x = 0;
while (!sr.EndOfStream)
{
AllLines.Add(sr.ReadLine());
x += 1;
}
sr.Close();
}
Parallel.For(0, AllLines.Count, x =>
{
InsertDataCheck(AllLines[x]);
});
}
GC.Collect();
return true;
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
return false;
}
private void InsertDataCheck(string Line)
{
//check if you want to insert data on the basis of your condition
//and then insert your data
}
我不得不使用包含数百万条记录的批量数据来处理这种情况。从许多浪费的经验中:
1)尽你所能不使用excel。它很慢并且会占用大量内存。一张包含 500,00 条记录的工作表最终可能会占用超过 2 GB 的内存来加载文件。然后,导入一张纸需要 30-40-50 分钟或更长时间。考虑将数据转换为 CSV 并使用SqlBulkCopy
. 它可以处理数秒到数分钟的大量记录,而不是数小时。
2) 在这种情况下,要提高实体框架的性能,您无能为力。我发现最好和最快的方法是将每张工作表加载到数据库中自己的临时表中。然后我构造了 SQL 来将批量插入到他们的最终表中。中间插入的结果可以被捕获到输出表中,这样您就可以访问从临时表执行任何连接或插入相关表所需的键。您当然可以“窃取”一些自动生成的 EF SQL,但随后您需要对其进行微调。
3) 尽管 SQL 讨厌循环,但我编写了我的 sql 语句以在循环中运行并一次插入 100,000 条记录。它使插入运行得更快。
为了给您一个想法,在批量导入每个表单 CSV 后:
首先根据需要定义存储在相关表中的变量和类型:
DECLARE @Max INT = @RecordsPerLoop
DECLARE @Min INT = 0
DECLARE @TotalRECORD INT = (
SELECT count(*)
FROM TempClassMemberRecords
)
DECLARE @Country VARCHAR(50)
SET @Country = 'USA'
-- Const variables for class member inserts
DECLARE @DefaultCommPreference VARCHAR(50) = (
SELECT TOP 1 CommPreference
FROM Actors
WHERE PKID = 0
)
,@PrimaryActorTypeId INT = (
SELECT TOP 1 PKId
FROM ActorTypes
WHERE ActorTypeName = 'PrimaryClaimant'
)
,@SecondaryActorTypeId INT = (
SELECT TOP 1 PKId
FROM ActorTypes
WHERE ActorTypeName = 'CoClaimant'
)
,@HomePhoneTypeId INT = (
SELECT TOP 1 PKId
FROM PhoneTypes
WHERE PhoneTypeName = 'Home'
)
,@WorkPhoneTypeId INT = (
SELECT TOP 1 PKId
FROM PhoneTypes
WHERE PhoneTypeName = 'Work'
)
,@PrimaryCountryId INT = IsNull((
SELECT TOP 1 PKId
FROM Countries
WHERE @Country IN (
CountryName
,CountryCode
)
), 0)
,@DefaultCountryId INT = IsNull((
SELECT TOP 1 PKId
FROM Countries
WHERE CountryCode = 'USA'
), 0)
,@SubmitTypeId INT = (
SELECT TOP 1 PKId
FROM ClaimSubmitTypes
WHERE SubmitTypeName = 'Bulk'
)
,@ClaimStatusId INT = (
SELECT TOP 1 PKId
FROM ClaimStatusTypes
WHERE StatusName = 'Active'
)
,@ModifiedBy VARCHAR(20) = @uploadUser
,@ModifiedDate DATETIME = GETDATE()
,@CaseCode VARCHAR(50) = (
SELECT TOP 1 CaseCode
FROM Cases
ORDER BY PKId DESC
) + ''
,@IndividualClaimantType INT = (
SELECT TOP 1 PKId
FROM claimanttypes
WHERE ClaimantTypeName = 'Individual'
)
,@CompanyClaimantType INT = (
SELECT TOP 1 PKId
FROM claimanttypes
WHERE ClaimantTypeName = 'Corporation'
)
,@Checked BIT = 0
,@startingPKId INT = (
SELECT max(PKId) + 1
FROM dbo.Entities WITH (NOLOCK)
);
--Record per group insert
IF (@TotalRECORD <= @RecordsPerLoop)
SET @max = @TotalRECORD
运行你的插入循环:
-- our main loop
WHILE (@min <= @TotalRECORD)
BEGIN
IF OBJECT_ID('tempdb..#EntityIds') IS NOT NULL
DROP TABLE #EntityIds
IF OBJECT_ID('tempdb..#RefNumRepository') IS NOT NULL
DROP TABLE #RefNumRepository
IF OBJECT_ID('tempdb..#ActorIds') IS NOT NULL
DROP TABLE #ActorIds
IF OBJECT_ID('tempdb..#SecondaryActorIds') IS NOT NULL
DROP TABLE #SecondaryActorIds
CREATE TABLE #EntityIds (
pkid INT identity(1, 1) NOT NULL
,mid INT
,eid INT
)
CREATE TABLE #ActorIds (
pkid INT identity(1, 1) NOT NULL
,mid INT
,aid INT
)
CREATE TABLE #SecondaryActorIds (
pkid INT identity(1, 1) NOT NULL
,mid INT
,aid INT
)
CREATE TABLE #RefNumRepository (
pkid INT identity(1, 1) NOT NULL
,RefNum VARCHAR(50)
)
BEGIN TRANSACTION
BEGIN TRY
UPDATE TOP (@RecordsPerLoop + 1) RefNumRepository
SET IsUsed = 1
OUTPUT deleted.RefNum
INTO #RefNumRepository(RefNum)
WHERE IsUsed = 0;
PRINT 'Entities'
INSERT INTO Entities (
ModifiedBy
,ModifiedDate
,RecordOwnerName
,IsConflictOfInterest
,FKClaimantTypeId
,OtherClaimantType
,InstitutionAccountNumber
,RefNum
,FKSubmitTypeId
,FKClaimStatusTypeId
,RecordType
,ClaimNum
,FilingDate
,FirstName
,Lastname
,Email
,SSN
,Source
,ClaimDataCertifiedDate
)
OUTPUT Inserted.pkid
,Inserted.source
INTO #EntityIds(eid, mid)
SELECT @ModifiedBy
,@ModifiedDate
,NULL
,1
,CASE
WHEN IsNull(company, '') = ''
THEN @IndividualClaimantType
ELSE @CompanyClaimantType
END
,NULL
,NULL
,''
,@SubmitTypeId
,@ClaimStatusId
,'CM'
,NULL
,@ModifiedDate
,IsNull(fname, '')
,IsNull(lname, '')
,IsNull(Email, '')
,IsNull(ssn, '')
,rawID
,@ModifiedDate
FROM TempClassMemberRecords
WHERE rawID BETWEEN @min
AND @max
AND IsProcessed IS NULL
EXEC dbo.[USP_AssignClassMemberRefNums] @startingPKId
PRINT 'Actors'
-- bulk insert our range of class members into Actors while inserting the primary key into our temp table
INSERT INTO Actors (
FKActorTypeId
,ModifiedBy
,ModifiedDate
,LastName
,FirstName
,MiddleName
,CommPreference
,IsPayee
,IsUSCitizen
,ein
,ssn
,company
,attention
,NotificationsBlocked
,SearchName
,ClientAcctNumber
)
OUTPUT Inserted.pkid
,inserted.attention
INTO #ActorIds(aid, mid)
SELECT @PrimaryActorTypeId
,@ModifiedBy
,@ModifiedDate
,IsNull(lname, '')
,IsNull(fname, '')
,''
,IsNull(@DefaultCommPreference, 'Mail')
,1
,NULL
,IsNull(ein, '')
,IsNull(ssn, '')
,IsNull(company, '')
,rawid
,0
,CASE WHEN len(ISNULL(company, '')) > 0 THEN company
ELSE
CASE WHEN (len(ISNULL(lname, '')) > 0 OR len(ISNULL(fname, '')) > 0)
THEN lname + ', ' + fname
ELSE ''
END
END
,ACCTNUM
FROM TempClassMemberRecords
WHERE (
isnull(company, '') <> ''
OR isNull(fname, '') <> ''
OR isNull(lname, '') <> ''
)
AND rawid BETWEEN @Min
AND @Max
AND IsProcessed IS NULL
PRINT 'Entities2Actors'
-- bulk insert the relations of Entities to Actors in Entities2Actors
INSERT INTO Entities2Actors (
FKEntityId
,FKActorId
,IsActorBeneficiary
,ModifiedBy
,ModifiedDate
)
SELECT e.eid
,a.aid
,1
,@ModifiedBy
,@ModifiedDate
FROM #EntityIds e
INNER JOIN #ActorIds a ON e.mid = a.mid
-- etc...
PRINT 'Addressed'
--Bulk Insert into Address table for Primary Actor Address
INSERT INTO Addresses (
FKActorId
,ModifiedBy
,ModifiedDate
,Address1
,Address2
,City
,STATE
,Zip
,Zip4
,FKCountryId
)
SELECT a.aid
,@ModifiedBy
,@ModifiedDate
,IsNull(Address, '')
,IsNull(Address2, '')
,IsNull(City, '')
,IsNull([State], '')
,IsNull(Zip, '')
,IsNull(Zip4, '')
,ISNULL(@PrimaryCountryId, @DefaultCountryId)
FROM #ActorIds a
INNER JOIN TempClassMemberRecords c ON a.mid = c.rawId
-- etc...
UPDATE tempClassMemberRecords
SET IsProcessed = 1
WHERE rawid BETWEEN @Min
AND @Max
AND IsProcessed IS NULL
SET @Min = @max + 1
SET @max = @max + @RecordsPerloop
COMMIT TRANSACTION
WAITFOR DELAY '000:00:00.400'
END TRY
BEGIN CATCH
ROLLBACK TRANSACTION
RAISERROR (N'Error in moving data from Temporary table to Main tables.', -- Message text.
1,
1);
PRINT 'Failed with error: ' + ERROR_MESSAGE()
END CATCH