0

我在嵌套关系的 excel 数据导入中遇到性能缓慢的问题。

我有两个要插入的主表和其他四个表,它们与主表具有一对多和多对多的关系。

在为两个主表插入数据之前,我检查了四个表的导入数据是现有的还是新的,因为导入的文件可能具有四个表的相同数据,并且相同的数据不能插入多次。

这就是性能缓慢的原因。

我该如何解决这个问题?

4

2 回答 2

0

使用 .Net ReadAllLines() 方法将整个文件读入字符串数组对象,然后运行 ​​Parallel For 循环以并行处理所有行。

private bool ProcessFile(string FolderPath, string FileExtension)
{
    try
    {
        //all files with requisite file extension
        DirectoryInfo dinfo = new DirectoryInfo(FolderPath);
        FileInfo[] Files = dinfo.GetFiles(FileExtension);
        foreach (FileInfo file in Files)
        {
            List<String> AllLines = new List<String>();
            using (StreamReader sr = File.OpenText(file.FullName))
            {
                int x = 0;
                while (!sr.EndOfStream)
                {
                    AllLines.Add(sr.ReadLine());
                    x += 1;
                }
                sr.Close();
            } 

            Parallel.For(0, AllLines.Count, x =>
            { 
                InsertDataCheck(AllLines[x]);
            }); 

        }
        GC.Collect();
        return true;
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
    return false;
}

private void InsertDataCheck(string Line)
{
   //check if you want to insert data on the basis of your condition
   //and then insert your data    
}
于 2017-07-07T06:11:28.893 回答
0

我不得不使用包含数百万条记录的批量数据来处理这种情况。从许多浪费的经验中:

1)尽你所能不使用excel。它很慢并且会占用大量内存。一张包含 500,00 条记录的工作表最终可能会占用超过 2 GB 的内存来加载文件。然后,导入一张纸需要 30-40-50 分钟或更长时间。考虑将数据转换为 CSV 并使用SqlBulkCopy. 它可以处理数秒到数分钟的大量记录,而不是数小时。

2) 在这种情况下,要提高实体框架的性能,您无能为力。我发现最好和最快的方法是将每张工作表加载到数据库中自己的临时表中。然后我构造了 SQL 来将批量插入到他们的最终表中。中间插入的结果可以被捕获到输出表中,这样您就可以访问从临时表执行任何连接或插入相关表所需的键。您当然可以“窃取”一些自动生成的 EF SQL,但随后您需要对其进行微调。

3) 尽管 SQL 讨厌循环,但我编写了我的 sql 语句以在循环中运行并一次插入 100,000 条记录。它使插入运行得更快。

为了给您一个想法,在批量导入每个表单 CSV 后:

首先根据需要定义存储在相关表中的变量和类型:

DECLARE @Max INT = @RecordsPerLoop
DECLARE @Min INT = 0
DECLARE @TotalRECORD INT = (
        SELECT count(*)
        FROM TempClassMemberRecords
        )
DECLARE @Country VARCHAR(50)

SET @Country = 'USA'

-- Const variables for class member inserts
DECLARE @DefaultCommPreference VARCHAR(50) = (
        SELECT TOP 1 CommPreference
        FROM Actors
        WHERE PKID = 0
        )
    ,@PrimaryActorTypeId INT = (
        SELECT TOP 1 PKId
        FROM ActorTypes
        WHERE ActorTypeName = 'PrimaryClaimant'
        )
    ,@SecondaryActorTypeId INT = (
        SELECT TOP 1 PKId
        FROM ActorTypes
        WHERE ActorTypeName = 'CoClaimant'
        )
    ,@HomePhoneTypeId INT = (
        SELECT TOP 1 PKId
        FROM PhoneTypes
        WHERE PhoneTypeName = 'Home'
        )
    ,@WorkPhoneTypeId INT = (
        SELECT TOP 1 PKId
        FROM PhoneTypes
        WHERE PhoneTypeName = 'Work'
        )
    ,@PrimaryCountryId INT = IsNull((
            SELECT TOP 1 PKId
            FROM Countries
            WHERE @Country IN (
                    CountryName
                    ,CountryCode
                    )
            ), 0)
    ,@DefaultCountryId INT = IsNull((
            SELECT TOP 1 PKId
            FROM Countries
            WHERE CountryCode = 'USA'
            ), 0)
    ,@SubmitTypeId INT = (
        SELECT TOP 1 PKId
        FROM ClaimSubmitTypes
        WHERE SubmitTypeName = 'Bulk'
        )
    ,@ClaimStatusId INT = (
        SELECT TOP 1 PKId
        FROM ClaimStatusTypes
        WHERE StatusName = 'Active'
        )
    ,@ModifiedBy VARCHAR(20) = @uploadUser
    ,@ModifiedDate DATETIME = GETDATE()
    ,@CaseCode VARCHAR(50) = (
        SELECT TOP 1 CaseCode
        FROM Cases
        ORDER BY PKId DESC
        ) + ''
    ,@IndividualClaimantType INT = (
        SELECT TOP 1 PKId
        FROM claimanttypes
        WHERE ClaimantTypeName = 'Individual'
        )
    ,@CompanyClaimantType INT = (
        SELECT TOP 1 PKId
        FROM claimanttypes
        WHERE ClaimantTypeName = 'Corporation'
        )
    ,@Checked BIT = 0
    ,@startingPKId INT = (
        SELECT max(PKId) + 1
        FROM dbo.Entities WITH (NOLOCK)
        );

--Record per group insert
IF (@TotalRECORD <= @RecordsPerLoop)
    SET @max = @TotalRECORD

运行你的插入循环:

-- our main loop
WHILE (@min <= @TotalRECORD)
BEGIN
    IF OBJECT_ID('tempdb..#EntityIds') IS NOT NULL
        DROP TABLE #EntityIds

    IF OBJECT_ID('tempdb..#RefNumRepository') IS NOT NULL
        DROP TABLE #RefNumRepository

    IF OBJECT_ID('tempdb..#ActorIds') IS NOT NULL
        DROP TABLE #ActorIds

    IF OBJECT_ID('tempdb..#SecondaryActorIds') IS NOT NULL
        DROP TABLE #SecondaryActorIds

    CREATE TABLE #EntityIds (
        pkid INT identity(1, 1) NOT NULL
        ,mid INT
        ,eid INT
        )

    CREATE TABLE #ActorIds (
        pkid INT identity(1, 1) NOT NULL
        ,mid INT
        ,aid INT
        )

    CREATE TABLE #SecondaryActorIds (
        pkid INT identity(1, 1) NOT NULL
        ,mid INT
        ,aid INT
        )

    CREATE TABLE #RefNumRepository (
        pkid INT identity(1, 1) NOT NULL
        ,RefNum VARCHAR(50)
        )

    BEGIN TRANSACTION

    BEGIN TRY
        UPDATE TOP (@RecordsPerLoop + 1) RefNumRepository
        SET IsUsed = 1
        OUTPUT deleted.RefNum
        INTO #RefNumRepository(RefNum)
        WHERE IsUsed = 0;

        PRINT 'Entities'
        INSERT INTO Entities (
            ModifiedBy
            ,ModifiedDate
            ,RecordOwnerName
            ,IsConflictOfInterest
            ,FKClaimantTypeId
            ,OtherClaimantType
            ,InstitutionAccountNumber
            ,RefNum
            ,FKSubmitTypeId
            ,FKClaimStatusTypeId
            ,RecordType
            ,ClaimNum
            ,FilingDate
            ,FirstName
            ,Lastname
            ,Email
            ,SSN
            ,Source
            ,ClaimDataCertifiedDate
            )
        OUTPUT Inserted.pkid
            ,Inserted.source
        INTO #EntityIds(eid, mid)
        SELECT @ModifiedBy
            ,@ModifiedDate
            ,NULL
            ,1
            ,CASE 
                WHEN IsNull(company, '') = ''
                    THEN @IndividualClaimantType
                ELSE @CompanyClaimantType
                END
            ,NULL
            ,NULL
            ,''
            ,@SubmitTypeId
            ,@ClaimStatusId
            ,'CM'
            ,NULL
            ,@ModifiedDate
            ,IsNull(fname, '')
            ,IsNull(lname, '')
            ,IsNull(Email, '')
            ,IsNull(ssn, '')
            ,rawID
            ,@ModifiedDate
        FROM TempClassMemberRecords
        WHERE rawID BETWEEN @min
                AND @max
            AND IsProcessed IS NULL

        EXEC dbo.[USP_AssignClassMemberRefNums] @startingPKId

        PRINT 'Actors'
        -- bulk insert our range of class members into Actors while inserting the primary key into our temp table
        INSERT INTO Actors (
            FKActorTypeId
            ,ModifiedBy
            ,ModifiedDate
            ,LastName
            ,FirstName
            ,MiddleName
            ,CommPreference
            ,IsPayee
            ,IsUSCitizen
            ,ein
            ,ssn
            ,company
            ,attention
            ,NotificationsBlocked
            ,SearchName
            ,ClientAcctNumber
            )
        OUTPUT Inserted.pkid
            ,inserted.attention
        INTO #ActorIds(aid, mid)
        SELECT @PrimaryActorTypeId
            ,@ModifiedBy
            ,@ModifiedDate
            ,IsNull(lname, '')
            ,IsNull(fname, '')
            ,''
            ,IsNull(@DefaultCommPreference, 'Mail')
            ,1
            ,NULL
            ,IsNull(ein, '')
            ,IsNull(ssn, '')
            ,IsNull(company, '')
            ,rawid
            ,0
            ,CASE WHEN len(ISNULL(company, '')) > 0 THEN company  
            ELSE 
            CASE WHEN (len(ISNULL(lname, '')) > 0 OR len(ISNULL(fname, '')) > 0)
            THEN lname + ', ' +  fname   
            ELSE ''
            END
            END
            ,ACCTNUM
        FROM TempClassMemberRecords
        WHERE (
                isnull(company, '') <> ''
                OR isNull(fname, '') <> ''
                OR isNull(lname, '') <> ''
                )
            AND rawid BETWEEN @Min
                AND @Max
            AND IsProcessed IS NULL


    PRINT 'Entities2Actors'
    -- bulk insert the relations of Entities to Actors in Entities2Actors
    INSERT INTO Entities2Actors (
        FKEntityId
        ,FKActorId
        ,IsActorBeneficiary
        ,ModifiedBy
        ,ModifiedDate
        )
    SELECT e.eid
        ,a.aid
        ,1
        ,@ModifiedBy
        ,@ModifiedDate
    FROM #EntityIds e
    INNER JOIN #ActorIds a ON e.mid = a.mid
    -- etc...
    PRINT 'Addressed'
    --Bulk Insert into Address table for Primary Actor Address
    INSERT INTO Addresses (
        FKActorId
        ,ModifiedBy
        ,ModifiedDate
        ,Address1
        ,Address2
        ,City
        ,STATE
        ,Zip
        ,Zip4
        ,FKCountryId
        )
    SELECT a.aid
        ,@ModifiedBy
        ,@ModifiedDate
        ,IsNull(Address, '')
        ,IsNull(Address2, '')
        ,IsNull(City, '')
        ,IsNull([State], '')
        ,IsNull(Zip, '')
        ,IsNull(Zip4, '')
        ,ISNULL(@PrimaryCountryId, @DefaultCountryId)
    FROM #ActorIds a
    INNER JOIN TempClassMemberRecords c ON a.mid = c.rawId
-- etc...
UPDATE tempClassMemberRecords
    SET IsProcessed = 1
    WHERE rawid BETWEEN @Min
            AND @Max
        AND IsProcessed IS NULL

    SET @Min = @max + 1
    SET @max = @max + @RecordsPerloop

    COMMIT TRANSACTION

    WAITFOR DELAY '000:00:00.400'
END TRY

BEGIN CATCH
    ROLLBACK TRANSACTION

    RAISERROR (N'Error in moving data from Temporary table to Main tables.', -- Message text.
       1,
       1);

    PRINT 'Failed with error: ' + ERROR_MESSAGE()
END CATCH
于 2017-07-07T03:07:34.437 回答