IRuby.html '' require 'rgeo' require 'rgeo-geojson' require 'dbscan' require 'nyaplot' Nyaplot.init_iruby geomstr = '{"type":"FeatureCollection","features":[{"type":"Feature","properties":{"user_id":638},"geometry":{"type":"Polygon","coordinates":[[[-73.98620970547199,40.7356342514617],[-73.98627072572708,40.735547874977094],[-73.98632504045963,40.73557226364293],[-73.98622445762157,40.73570995781772],[-73.9861835539341,40.73569268254945],[-73.98621775209902,40.735640856717666]]]}},{"type":"Feature","properties":{"user_id":666},"geometry":{"type":"Polygon","coordinates":[[[-73.98620769381522,40.73563526765495],[-73.9862660318613,40.735547874977094],[-73.98632504045963,40.735570739351566],[-73.98622579872608,40.73570944972167],[-73.98618154227734,40.73569217445325],[-73.98621775209902,40.73563933242788]]]}},{"type":"Feature","properties":{"session_id":"79e7ee062a9e0333926e3e1fdc3e92db"},"geometry":{"type":"Polygon","coordinates":[[[-73.98632369935513,40.735570739351566],[-73.98622512817383,40.73570944972167],[-73.98618154227734,40.73569014206842],[-73.98621909320354,40.735640856717666],[-73.98620970547199,40.73563526765495],[-73.98627005517483,40.73554889117169]]]}},{"type":"Feature","properties":{"session_id":"3d3003b26bb6b2f3b9577924b9ed5e0e"},"geometry":{"type":"Polygon","coordinates":[[[-73.98621842265129,40.7356423810074],[-73.98620903491974,40.73563577575159],[-73.98627139627934,40.735547874977094],[-73.98632436990738,40.735571755545806],[-73.98622579872608,40.73570995781772],[-73.98618087172508,40.735689633972214]]]}},{"type":"Feature","properties":{"user_id":596},"geometry":{"type":"Polygon","coordinates":[[[-73.98626938462257,40.73554889117167],[-73.98632369935513,40.735572771740024],[-73.98622445762157,40.73570894162559],[-73.98618154227734,40.73569065016463],[-73.98621775209902,40.735640856717666],[-73.98620836436749,40.735634251461676]]]}},{"type":"Feature","properties":{"session_id":"0afaf74383ce51aceba02fc49ce5a9e3"},"geometry":{"type":"Polygon","coordinates":[[[-73.98621775209902,40.73563984052446],[-73.98620836436749,40.73563272717173],[-73.98626938462257,40.735550415463514],[-73.98632235825062,40.73557124744871],[-73.98622360456956,40.73570641325812],[-73.98618768252459,40.73568957578454]]]}},{"type":"Feature","properties":{"user_id":538},"geometry":{"type":"Polygon","coordinates":[[[-73.98632571101189,40.735571755545806],[-73.98622378706932,40.73570995781772],[-73.98618288338184,40.73569268254945],[-73.98621775209902,40.73564034862108],[-73.9862110465765,40.7356362838482],[-73.98627005517483,40.735550923560815]]]}},{"type":"Feature","properties":{"user_id":580},"geometry":{"type":"Polygon","coordinates":[[[-73.98632436990738,40.73557124744871],[-73.98626066744328,40.7356581319994],[-73.98625999689102,40.7356581319994],[-73.98620903491974,40.735634759558316],[-73.98626804351805,40.735547874977094]]]}},{"type":"Feature","properties":{"user_id":580},"geometry":{"type":"Polygon","coordinates":[[[-73.98626133799553,40.7356581319994],[-73.98622579872608,40.73570944972167],[-73.98618154227734,40.73569166635704],[-73.98621842265129,40.73563984052446]]]}},{"type":"Feature","properties":{"user_id":548},"geometry":{"type":"Polygon","coordinates":[[[-73.98620970547199,40.73563475955834],[-73.98627005517483,40.73554990736624],[-73.98632369935513,40.735571755545806],[-73.98622360456956,40.73570641325812],[-73.9861848950386,40.735689633972214],[-73.98621842265129,40.735640856717666]]]}},{"type":"Feature","properties":{"session_id":"53056025663f6d6564a39975971cb87c"},"geometry":{"type":"Polygon","coordinates":[[[-73.98621909320354,40.735638316234656],[-73.98620836436749,40.7356362838482],[-73.98620769381522,40.73563577575159],[-73.98627005517483,40.73554939926897],[-73.98632302880287,40.73557023125444],[-73.98622360456956,40.73570641325812],[-73.98617953062057,40.735689633972214]]]}}]}' geocollection = RGeo::GeoJSON.decode(geomstr, :json_parser => :json) def parse_geojson(json) RGeo::GeoJSON.decode(json, :json_parser => :json) end geocollection.first.geometry def get_centroid(poly_feature) return if (poly_feature.geometry.geometry_type.type_name != "Polygon") c = poly_feature.geometry.centroid return [c.x, c.y] end centroid = get_centroid(geocollection.first) def get_all_centroids(geom) centroids = {} geom.each_with_index do |poly,index| next if (poly.geometry.geometry_type.type_name != "Polygon") centroids[index] = get_centroid(poly) end return centroids end centroids = get_all_centroids(geocollection) plot = Nyaplot::Plot.new plot.width(400) plot.height(400) plot.zoom(true) plot.rotate_x_label(-60) points_x = centroids.map { |p| p[1][0] } points_y = centroids.map { |p| p[1][1] } df = Nyaplot::DataFrame.new({x:points_x,y:points_y}) # add some padding xmin = points_x.min - 1e-5 xmax = points_x.max + 1e-5 ymin = points_y.min - 1e-5 ymax = points_y.max + 1e-5 plot.xrange([xmin,xmax]) plot.yrange([ymin,ymax]) # end padding sc = plot.add_with_df(df, :scatter, :x, :y) plot.show dists = [] done = {} centroids.each_with_index do |cc1,i| centroids.each_with_index do |cc2,j| c1 = cc1[1] c2 = cc2[1] dists.push({:dist=>Math.hypot(c1[0]-c2[0],c1[1]-c2[1]),:from=>i,:to=>j,:from_centroid=>c1,:to_centroid=>c2}) if (c1 != c2 && !done[[c2,c1]]) done[[c1,c2]] = true end end dists = dists.sort_by!{|k| k[:dist]} dist_df = Nyaplot::DataFrame.new(dists) def cluster_centroids(centroids, epsilon=1.8e-06, min_points=2) dbscan = DBSCAN( centroids.map{|c| c[1]}, :epsilon => epsilon, :min_points => min_points, :distance => :euclidean_distance ) return dbscan.results.select{|k,v| k != -1} # omit the non-cluster end centroid_clusters = cluster_centroids(centroids) def plot_clusters(clusters) plot = Nyaplot::Plot.new plot.width(300) plot.height(400) plot.zoom(true) plot.rotate_x_label(-60) pts = clusters.map{|c| c[1]}.flatten(1) # add some padding xmin = pts.map {|p| p[0]}.min - 1e-5 xmax = pts.map {|p| p[0]}.max + 1e-5 ymin = pts.map {|p| p[1]}.min - 1e-5 ymax = pts.map {|p| p[1]}.max + 1e-5 plot.xrange([xmin,xmax]) plot.yrange([ymin,ymax]) # now plot clusters.each do |cluster| if cluster[0] != -1 # ignore cluster -1 because not enough points cluster_x = cluster[1].map { |c| c[0] } cluster_y = cluster[1].map { |c| c[1] } names = cluster[1].map { |c| cluster[0] } df = Nyaplot::DataFrame.new({x:cluster_x,y:cluster_y,cluster:names}) sc = plot.add_with_df(df, :scatter, :x, :y) sc.tooltip_contents([:cluster]) color = "#"+ "%06x" % (rand * 0xffffff) sc.color(color) end end plot.show return plot end plot = plot_clusters(centroid_clusters) plot.show # given a list of centroids (lon,lat), find their poly's index in the centroid list (index => lon,lat) def get_polys_for_centroid_cluster(cluster, centroids, original_polys) polys = [] cluster.each do |cl| index = centroids.select {|k,v| v == cl}.keys.first polys.push(original_polys[index]) if index != -1 end return polys end cluster_polygons = get_polys_for_centroid_cluster(centroid_clusters[0], centroids, geocollection) def get_points(poly_feature) geom = poly_feature.geometry return false if (geom.geometry_type.type_name != "Polygon") pts = [] points = geom.exterior_ring.points points.each do |point| pts.push([point.x,point.y]) end return pts end def plot_polys(polys) plot = Nyaplot::Plot.new plot.width(500) plot.height(500) plot.zoom(true) plot.rotate_x_label(-60) polys.each do |poly| plot_poly(poly, plot) end plot.show end def plot_poly(poly, plot = nil) showplot = false if plot == nil showplot = true plot = Nyaplot::Plot.new plot.width(500) plot.height(500) plot.zoom(true) plot.rotate_x_label(-60) end points = get_points(poly) points_x = points.map { |p| p[0] } points_y = points.map { |p| p[1] } df = Nyaplot::DataFrame.new({x:points_x,y:points_y}) sc = plot.add_with_df(df, :scatter, :x, :y) color = "#"+ "%06x" % (rand * 0xffffff) sc.color(color) plot.show if showplot end plot_polys(cluster_polygons) def get_all_poly_points(polys) points = [] polys.each do |poly| points.push(get_points(poly)) end return points end cluster_poly_points = get_all_poly_points(cluster_polygons) def cluster_points(points, epsilon=6e-06, min_points=2) dbscan = DBSCAN( points.flatten(1), :epsilon => epsilon, :min_points => min_points, :distance => :euclidean_distance ) return dbscan.results.select{|k,v| k != -1} # omit the non-cluster end # exclude first item in each poly since it is same as last unique_points = cluster_poly_points.map{|poly| poly[1..-1]} vertex_clusters = cluster_points(unique_points) plot = plot_clusters(vertex_clusters) plot.show class Array def sum inject(0.0) { |result, el| result + el } end def mean sum / size end end def get_mean_poly(clusters) means = {} clusters.each do |cluster| next if cluster[0] == -1 # ignore cluster -1 lon = cluster[1].map {|c| c[0]}.mean lat = cluster[1].map {|c| c[1]}.mean means[cluster[0]] = [lon,lat] end return means end mean_poly = get_mean_poly(vertex_clusters) # plot clusters with overlaid (yellow) mean points plot = plot_clusters(vertex_clusters) # add means m_x = mean_poly.map { |m| m[1][0] } m_y = mean_poly.map { |m| m[1][1] } sc = plot.add(:scatter, m_x, m_y) color = "#ffff00" sc.color(color) sc.shape('diamond') plot.show def validate_clusters(clusters, unique_points) average = (unique_points.flatten.count.to_f / (unique_points.size * 2).to_f).round return clusters.select{|k,v| k!=-1}.size >= average end validate_clusters(vertex_clusters, unique_points) def find_connected_point(point, original_points) original_points.each do |poly| poly.each_with_index do |p,index| return poly[index+1] if point === p end end return end def find_cluster_for_point(point, clusters) clusters.each do |cluster| cluster[1].each do |p| return cluster[0] if point === p && cluster[0] != -1 end end return end def connect_clusters(clusters, original_points) connections = {} # for each cluster clusters.each do |cluster| # for each point in cluster if cluster[0] != -1 # exclude invalid cluster cluster_votes = {} # to weigh connection popularity (diff pts might be connected to diff clusters) cluster[1].each do |point| # find original point connected to it connection = find_connected_point(point, original_points) connected_cluster = find_cluster_for_point(connection, clusters) # if original point belongs to another cluster if connected_cluster != nil && connected_cluster != cluster[0] # vote for the cluster cluster_votes[connected_cluster] = 0 if cluster_votes[connected_cluster] == nil cluster_votes[connected_cluster] += 1 end end connections[cluster[0]] = cluster_votes.sort_by{|k, v| v} next if connections[cluster[0]].size == 0 connections[cluster[0]] = connections[cluster[0]].reverse[0][0] end end return connections end connections = connect_clusters(vertex_clusters, cluster_poly_points) def sort_connections(connections) # does some simple check for non-circularity sorted = [] seen = {} as_list = connections.select{|k,v| k} done = false first = as_list.first[0] from = first while !done do to = connections[from] done = true if seen[to] || to == nil || to.size == 0 seen[to] = true from = to sorted.push(to) done = true if seen.size == connections.size end return nil if seen.size != connections.size return sorted end # testing sort function sort_connections(connections) def connect_mean_poly(mean_poly, connections) connected = [] sorted = sort_connections(connections) return nil if sorted == nil sorted.each do |c| connected.push([mean_poly[c][0], mean_poly[c][1]]) end # for GeoJSON, last == first first = sorted[0] connected.push([mean_poly[first][0], mean_poly[first][1]]) return connected end final_polygon = connect_mean_poly(mean_poly, connections) plot = plot_clusters(vertex_clusters) m_x = final_polygon.map { |m| m[0] } m_y = final_polygon.map { |m| m[1] } sc = plot.add(:scatter, m_x, m_y) color = "#ffff00" sc.color(color) sc.shape('diamond') # add the MEAN POLYGON final_polygon.each_with_index do |c, i| next if i >= final_polygon.size-1 from = [ final_polygon[i][0], final_polygon[i+1][0] ] to = [ final_polygon[i][1], final_polygon[i+1][1] ] plot.add(:line, from, to) end plot.show def calculate_polygonfix_consensus(geojson) output = [] geom = parse_geojson(geojson) centroids = get_all_centroids(geom) centroid_clusters = cluster_centroids(centroids) centroid_clusters.each do |ccluster| next if ccluster[0] == -1 cluster = ccluster[1] # only the set of latlons sub_geom = get_polys_for_centroid_cluster(cluster, centroids, geom) next if sub_geom.size == 0 original_points = get_all_poly_points(sub_geom) next if original_points == nil unique_points = original_points.map{|poly| poly[1..-1]} vertex_clusters = cluster_points(unique_points) next if !validate_clusters(vertex_clusters, unique_points) mean_poly = get_mean_poly(vertex_clusters) next if mean_poly == {} connections = connect_clusters(vertex_clusters, original_points) next if connections == nil || connections == {} poly = connect_mean_poly(mean_poly, connections) next if poly == nil || poly.count == 0 output.push(poly) end return output end consensus = calculate_polygonfix_consensus(geomstr) geo_json = {:type => "FeatureCollection", :features => consensus.map { |f| {:type => "Feature", :properties => { :id => 1 }, :geometry => { :type => "Polygon", :coordinates =>[f] } } } }.to_json puts geo_json IRuby.html ''