1

我创建了一个查询来生成一些数据到 sql 数据库,但是生成 1 GB 数据大约需要 45 分钟。如何提高数据生成的性能?

DECLARE @RowCount INT
DECLARE @RowString VARCHAR(10)
DECLARE @Random INT
DECLARE @Upper INT
DECLARE @Lower INT
DECLARE @InsertDate DATETIME

SET @Lower = -730
SET @Upper = -1
SET @RowCount = 0

WHILE @RowCount < 3000000
BEGIN
 SET @RowString = CAST(@RowCount AS VARCHAR(10))
 SELECT @Random = ROUND(((@Upper - @Lower -1) * RAND() + @Lower), 0)
 SET @InsertDate = DATEADD(dd, @Random, GETDATE())

 INSERT INTO Table_1
  (q
  ,w
  ,e
  ,r
  ,t
  ,y)
 VALUES
  (REPLICATE('0', 10 - DATALENGTH(@RowString)) + @RowString
  , @InsertDate
  ,DATEADD(dd, 1, @InsertDate)
  ,DATEADD(dd, 2, @InsertDate)
  ,DATEADD(dd, 3, @InsertDate)
  ,DATEADD(dd, 4, @InsertDate))

 SET @RowCount = @RowCount + 1
END
4

3 回答 3

2

问题是您一次生成并插入一行数据。SQL Server 并非设计为以这种方式工作。您需要找到基于集合的解决方案。这在 30 秒内对我有用:

CREATE TABLE #Table_1 (
    Id INT IDENTITY(1,1)
    , RowString AS REPLICATE('0', 10 - LEN(CAST(Id AS VARCHAR))) + CAST(Id AS VARCHAR)
    , Date1 DATETIME
);

DECLARE @Upper INT = -1;
DECLARE @Lower INT = -730;

INSERT  #Table_1 (Date1)
SELECT  TOP 3000000 DATEADD(dd, ROUND(((@Upper - @Lower -1) * RAND(checksum(newid())) + @Lower), 0), GETDATE())
FROM    (   SELECT  number
            FROM    master..spt_values      
            WHERE   TYPE = 'P' AND number <= 2000   
        ) a (Number)
        ,(  SELECT  number      
            FROM    master..spt_values      
            WHERE   TYPE = 'P' AND number <= 2000
        ) b (Number);

在#Table_1 临时表中获得上述数据后,将其插入Table_1 是一件简单的事情:

INSERT Table_1 (q,w,e,r,t,y)
SELECT RowString, Date1, Date1 + 1, Date1 + 2, Date1 + 3, Date1 + 4
FROM    #Table_1;
于 2013-08-28T14:23:00.173 回答
2

您也可以尝试以下操作:

;with seq as (
    select top (3000000) N = row_number() over (order by @@spid) - 1 from sys.all_columns c1, sys.all_columns c2
)
INSERT INTO Table_1 (q, w, e, r, t, y)
select
    right('0000000000' + cast(N as varchar(10)), 10)
    ,p.InsertDate
    ,DATEADD(dd, 1, p.InsertDate)
    ,DATEADD(dd, 2, p.InsertDate)
    ,DATEADD(dd, 3, p.InsertDate)
    ,DATEADD(dd, 4, p.InsertDate)
from seq
    cross apply (select DATEADD(dd, ROUND(((@Upper - @Lower -1) * RAND(checksum(newid())) + @Lower), 0), GETDATE())) p(InsertDate)
于 2013-08-28T14:28:26.543 回答
0

您在 sql 中遇到的最大瓶颈与日志记录有关,因为每个插入语句都会在事务日志中生成一个日志。

尽管表变量通常用于中小型数据量,但我认为您可以使用它们来为您带来好处,因为它们不参与事务、日志记录或锁定......

这是适当的示例代码:

--First declare the table variable
DECLARE @TempTable TABLE
(
    q VARCHAR(10),
    w DATETIME,
    e DATETIME,
    r DATETIME,
    t DATETIME,
    y DATETIME
)

...

WHILE @RowCount < 3000000
BEGIN
    ...

    -- Insert each row into the table variable, no logging is generated here
    INSERT INTO @TempTable
    (q
    ,w
    ,e
    ,r
    ,t
    ,y)
    VALUES
    (REPLICATE('0', 10 - DATALENGTH(@RowString)) + @RowString
    , @InsertDate
    ,DATEADD(dd, 1, @InsertDate)
    ,DATEADD(dd, 2, @InsertDate)
    ,DATEADD(dd, 3, @InsertDate)
    ,DATEADD(dd, 4, @InsertDate))

    ...
END

-- Bulk Insert the generated data, again no logging should be generated here
INSERT INTO Table_1 WITH(TABLOCK)
SELECT * FROM @TempTable
于 2013-08-28T14:55:42.083 回答