我正在研究在给定坐标附近选择最近点的性能。
选项是使用两个decimal(8,6)
纬度、长列或单列geography
并使用它。
我只感兴趣哪个更快?
我正在研究在给定坐标附近选择最近点的性能。
选项是使用两个decimal(8,6)
纬度、长列或单列geography
并使用它。
我只感兴趣哪个更快?
TL;DR Geography 的速度要快约 10 倍。
好的,所以我已经设置了测试:
几个表一个与id,lat,long (int, decimal(8,6),decimal(8,6))
另一个与id,coord (int, geography)
。
然后插入 47k 的随机数据。
对于第一个表的索引,我在 lat,long 上使用了非聚集升序索引,填充因子为 95。第二个表GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW
的填充因子为 95。
CREATE TABLE dbo.Temp
(
Id int NOT NULL IDENTITY (1, 1),
Coord geography NOT NULL
) ON [PRIMARY]
TEXTIMAGE_ON [PRIMARY]
GO
ALTER TABLE dbo.Temp ADD CONSTRAINT
PK_Temp PRIMARY KEY CLUSTERED
(
Id
) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
declare @i int =0
declare @lat decimal(8,6) =0.0
declare @long decimal(8,6) =0.0
while (@i < 47000)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
insert into Temp
select geography::Point(@lat, @long,4326)
set @i =@i+1
end
go
CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
(
[coord]
)USING GEOGRAPHY_GRID
WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW),
CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
CREATE TABLE [dbo].[Temp2](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Lat] [decimal](8, 6) NOT NULL,
[Long] [decimal](8, 6) NOT NULL,
CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
declare @i int = 0
declare @lat decimal(8,6) = 0
declare @long decimal(8,6) = 0
while (@i < 47000)
begin
set @lat = (select (0.9 - (RAND()*1.8))*100)
set @long = (select (0.9 - (RAND()*1.8))*100)
insert into Temp2
select @lat , @long
set @i = @i +1
end
go
CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2]
(
[Lat] ASC,
[Long] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
然后我进行了几个测试:
首先是纬度,经度。
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
@lat_c FLOAT = COS(@lat * PI() / 180)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
其次是地理。
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
while (@i < 100)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' ' +CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st
ORDER BY @g.STDistance(st.[coord]) ASC
set @i =@i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
结果:
对于那些想知道为什么地理性能如此差的人,这里的执行计划 - 请注意它不使用空间索引,并且由于行大小为 4047 字节(十进制为 25 字节)而需要很长时间才能进行排序。尝试强制索引会导致运行时错误
PS我也为平面做了一个,但与球形的差异非常小~0.5s(在 9.5-10.0 秒内返回,这似乎稍微快了一点)仍然将它全部放在一个地方这里的脚本:
print 'flat'
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
更新:
切换到 SQL 2014 并强制使用 10M 记录的索引后:
使用的地理脚本:
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' '
+CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st with(index([SpatialIndex_1]))
WHERE @g.STDistance(st.[coord]) IS NOT NULL
ORDER BY @g.STDistance(st.[coord]) asc