How do I match similar coordinates using Python?

北城余情 提交于 2019-12-05 07:42:20

For the amount of data you have, you can calculate a distance metric between each pair of points. Something like:

def close_enough(p1, p2):
    # You may need to scale the RA difference with dec. 
    return (p1.RA - p2.RA)**2 + (p1.Dec - p2.Dec)**2) < 0.01

candidates = [(p1,p2) for p1,p2 in itertools.combinations(points, 2)
              if close_enough(p1,p2)]

For a large data set you may want to use a line sweep algorithm to only calculate the metric for points that are in the same neighborhood. Like this:

import itertools as it
import operator as op
import sortedcontainers     # handy library on Pypi
import time

from collections import namedtuple
from math import cos, degrees, pi, radians, sqrt
from random import sample, uniform

Observation = namedtuple("Observation", "dec ra other")

Generate some test data

number_of_observations = 5000
field1 = [Observation(uniform(-25.0, -35.0),     # dec
                      uniform(45.0, 55.0),       # ra
                      uniform(0, 10))            # other data
          for shop_id in range(number_of_observations)]

# add in near duplicates
number_of_dups = 1000
dups = []
for obs in sample(field1, number_of_dups):
    dDec = uniform(-0.0001, 0.0001)
    dRA  = uniform(-0.0001, 0.0001)
    dups.append(Observation(obs.dec + dDec, obs.ra + dRA, obs.other))

data = field1 + dups

Here's the algorithm:

# Note: dec is first in Observation, so data is sorted by .dec then .ra.
data.sort()

# Parameter that determines the size of a sliding declination window
# and therefore how close two observations need to be to be considered
# observations of the same object.
dec_span = 0.0001

# Result. A list of observation pairs close enough to be considered 
# observations of the same object.
candidates = []

# Sliding declination window.  Within the window, observations are
# ordered by .ra.
window = sortedcontainers.SortedListWithKey(key=op.attrgetter('ra'))

# lag_obs is the 'southernmost' observation within the sliding declination window.
observation = iter(data)
lag_obs = next(observation)

# lead_obs is the 'northernmost' observation in the sliding declination window.
for lead_obs in data:

    # Dec of lead_obs represents the leading edge of window.
    window.add(lead_obs)

    # Remove observations further than the trailing edge of window.
    while lead_obs.dec - lag_obs.dec > dec_span:
        window.discard(lag_obs)
        lag_obs = next(observation)

    # Calculate 'east-west' width of window_size at dec of lead_obs
    ra_span = dec_span / cos(radians(lead_obs.dec))
    east_ra = lead_obs.ra + ra_span
    west_ra = lead_obs.ra - ra_span

    # Check all observations in the sliding window within
    # ra_span of lead_obs.
    for other_obs in window.irange_key(west_ra, east_ra):

        if other_obs != lead_obs:
            # lead_obs is at the top center of a box 2 * ra_span wide by 
            # 1 * ra_span tall.  other_obs is is in that box. If desired, 
            # put additional fine-grained 'closeness' tests here. 
            # For example:
            #    average_dec = (other_obs.dec + lead_obs.dec) / 2
            #    delta_dec = other_obs.dec - lead_obs.dec
            #    delta_ra  = other_obs.ra - lead_obs.ra)/cos(radians(average_dec))
            # e.g. if delta_dec**2 + delta_ra**2 < threshold:
            candidates.append((lead_obs, other_obs))

On my laptop, it finds the close point in < tenth of a second.

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!