I have thousands of polygons stored in a table format (given their 4 corner coordinates) which represent small regions of the earth. In addition, each polygon has a data val
I added a couple of things to my initial question, but this is a working solution so far. Do you have any ideas to speed things up? It is still quite slow. As input, I have over 100000 polygons and the meshgrid has 720*1440 grid cells. That is also why I changed the order, because there are a lot of grid cells with no intersecting polygons. Furthermore, when there is only one polygon that intersects with a grid cell, the grid cell receives the whole data value of the polygon. In addition, since I have to store the area fraction and the data value for the "post-processing" part, I set the possible number of intersections to 10.
from shapely.geometry import box, Polygon
import h5py
import numpy as np
f = h5py.File('data.he5','r')
geo = f['geo'][:] #10 columns: 4xlat, lat center, 4xlon, lon center
product = f['product'][:]
f.close()
#prepare the regular meshgrid
delta = 0.25
darea = delta**-2
llx, lly = np.meshgrid( np.arange(-180, 180, delta), np.arange(-90, 90, delta) )
urx, ury = np.meshgrid( np.arange(-179.75, 180.25, delta), np.arange(-89.75, 90.25, delta) )
lly = np.flipud(lly)
ury = np.flipud(ury)
llx = llx.flatten()
lly = lly.flatten()
urx = urx.flatten()
ury = ury.flatten()
#initialize the data structures
data = np.zeros(len(llx),'f2')+np.nan
counter = np.zeros(len(llx),'f2')
fraction = np.zeros( (len(llx),10),'f2')
value = np.zeros( (len(llx),10),'f2')
#go through all polygons
for ii in np.arange(1000):#len(hcho)):
percent = (float(ii)/float(len(hcho)))*100
print("Polygon: %i (%0.3f %%)" % (ii, percent))
xy = [ [geo[ii,5],geo[ii,0]], [geo[ii,7],geo[ii,2]], [geo[ii,8],geo[ii,3]], [geo[ii,6],geo[ii,1]] ]
polygon_shape = Polygon(xy)
# only go through grid cells which might intersect with the polygon
minx = np.min( geo[ii,5:9] )
miny = np.min( geo[ii,:3] )
maxx = np.max( geo[ii,5:9] )
maxy = np.max( geo[ii,:3] )
mask = np.argwhere( (lly>=miny) & (lly<=maxy) & (llx>=minx) & (llx<=maxx) )
if mask.size:
cc = 0
for mm in mask:
cc = int(counter[mm])
pixel_shape = box(llx[mm], lly[mm], urx[mm], ury[mm])
fraction[mm,cc] = polygon_shape.intersection(pixel_shape).area * darea
value[mm,cc] = hcho[ii]
counter[mm] += 1
print("post-processing")
mask = np.argwhere(counter>0)
for mm in mask:
for cc in np.arange(counter[mm]):
maxfraction = np.sum(fraction[mm,:])
value[mm,cc] = (fraction[mm,cc]/maxfraction) * value[mm,cc]
data[mm] = np.mean(value[mm,:int(counter[mm])])
data = data.reshape( 720, 1440 )