SQL Geometry VS decimal(8,6) Lat, Long Performance

笑着哭i 提交于 2019-12-05 15:36:25
Matas Vaitkevicius

TL;DR Geography is ~10 times faster.

Ok so I have set up test:

Couple of tables one with id,lat,long (int, decimal(8,6),decimal(8,6)) other with id,coord (int, geography).

Then insert 47k of random data.

For indexing first table I used nonclustered Ascending index on lat,long with fill factor of 95. for second one GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW with fill factor 95.

CREATE TABLE dbo.Temp
(
Id int NOT NULL IDENTITY (1, 1),
Coord geography NOT NULL
)  ON [PRIMARY]
 TEXTIMAGE_ON [PRIMARY]
GO
ALTER TABLE dbo.Temp ADD CONSTRAINT
    PK_Temp PRIMARY KEY CLUSTERED 
    (
    Id
    ) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]

GO


declare @i int =0
    declare @lat decimal(8,6) =0.0
      declare @long decimal(8,6) =0.0
  while (@i < 47000)
  begin
  set @lat =(select (0.9 -Rand()*1.8)*100)
 set @long =(select (0.9 -Rand()*1.8)*100)
    insert into Temp
  select geography::Point(@lat, @long,4326)


set @i =@i+1

 end

go


CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
(
    [coord]
)USING  GEOGRAPHY_GRID 
WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW), 
CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]

GO

CREATE TABLE [dbo].[Temp2](
    [Id] [int] IDENTITY(1,1) NOT NULL,
    [Lat] [decimal](8, 6) NOT NULL,
    [Long] [decimal](8, 6) NOT NULL,
 CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED 
(
    [Id] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

GO


declare @i int = 0
declare @lat decimal(8,6)  = 0 
declare @long decimal(8,6)  = 0

while (@i < 47000)
begin
set @lat = (select (0.9 - (RAND()*1.8))*100)
set @long = (select (0.9 - (RAND()*1.8))*100)

insert into Temp2
select @lat , @long

set @i = @i +1
end

go
CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2] 
(
    [Lat] ASC,
    [Long] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON, FILLFACTOR = 95) ON [PRIMARY]
GO

Then I ran couple of tests:

First is for Lat,Long.

declare @lat decimal(8,6) = 0.0,
 @lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()

while(@i < 100)
begin

set @lat =   (select (0.9 - Rand()*1.8)*100)
set @lon =  (select (0.9 - (RAND()*1.8))*100.0)

DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
        @lat_c FLOAT = COS(@lat * PI() / 180)


SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
    SELECT
        lat,
        long,
        ((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
    FROM dbo.Temp2
) t
ORDER BY dist

set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go

Second is for geography.

 DECLARE @g geography;


   declare @point nvarchar(50)  =''
 declare @i int =0,
     @lat decimal(8,6) =0.0,
       @long decimal(8,6) =0.0,
       @start datetime = getdate()
  while (@i < 100)
  begin
  set @lat =(select (0.9 -Rand()*1.8)*100)
 set @long =(select (0.9 -Rand()*1.8)*100)
 set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ '  ' +CONVERT(varchar(10), @long)+')')
 SET @g = geography::STGeomFromText(@point, 4326);
    SELECT TOP 1000
    @lat,
    @long,
        @g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)] 
    ,   st.[coord]
    ,   st.id
FROM    Temp st 
ORDER BY @g.STDistance(st.[coord]) ASC

set @i =@i+1

 end
print CONVERT(varchar,(getdate()-@start),108)
 go

Results:

  • Lat,Long - 00:00:10
  • Geography - 00:02:21

For those who are wondering why geography has such poor performance here's execution plan - notice that it does not use spatial index, and takes ages to sort since row size is 4047 Bytes (against 25 Bytes in decimal). Trying to force index results in runtime error

P.S I also did one for flat surface but difference from spherical one is very small ~0.5s (comes back in 9.5-10.0 seconds which does seem to be slightly faster) still to have it all in one place here's script:

print 'flat'
declare @lat decimal(8,6) = 0.0,
 @lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()

while(@i < 100)
begin

set @lat =   (select (0.9 - Rand()*1.8)*100)
set @lon =  (select (0.9 - (RAND()*1.8))*100.0)

SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
    SELECT
        lat,
        long,
        sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
    FROM dbo.Temp2
) t

ORDER BY dist

set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go

UPDATE:

After switching to SQL 2014 and forcing to use the index with 10M records:

  • Lat,Long took 00:00:22.935
  • Flat took 00:00:22.988
  • Geography took 00:00:02.427

geography script used:

DECLARE @g geography;
declare @point nvarchar(50)  =''
declare @i int =0,
        @lat decimal(8,6) =0.0,
        @long decimal(8,6) =0.0,
        @start datetime = getdate()
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ '  ' 
             +CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);

SELECT TOP 1000
    @lat,
    @long,
        @g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)] 
    ,   st.[coord]
    ,   st.id
FROM    Temp st with(index([SpatialIndex_1]))
WHERE @g.STDistance(st.[coord])  IS NOT NULL
ORDER BY @g.STDistance(st.[coord]) asc
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!