Skip to content

Commit 4b1b976

Browse files
authored
Merge pull request #198 from mapswipe/dev
Generate better geojson output and centroids
2 parents fda100a + 9acfbc8 commit 4b1b976

File tree

1 file changed

+134
-4
lines changed

1 file changed

+134
-4
lines changed

mapswipe_workers/mapswipe_workers/generate_stats.py

Lines changed: 134 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from psycopg2 import sql
44
import dateutil
55
import dateutil.parser
6+
import json
67

78
from mapswipe_workers import auth
89
from mapswipe_workers.definitions import logger
@@ -46,6 +47,10 @@ def generate_stats(only_new_results):
4647
get_aggregated_results_by_project_id_geom(filename)
4748
csv_to_geojson(filename)
4849

50+
filename = f'{DATA_PATH}/api-data/agg_res_by_project_id_centroid.csv'
51+
get_aggregated_results_by_project_id_centroid(filename)
52+
csv_to_geojson(filename)
53+
4954
filename = f'{DATA_PATH}/api-data/agg_projects.csv'
5055
get_aggregated_projects(filename)
5156

@@ -62,6 +67,10 @@ def generate_stats(only_new_results):
6267
get_aggregated_progress_by_project_id_geom(filename)
6368
csv_to_geojson(filename)
6469

70+
filename = f'{DATA_PATH}/api-data/agg_progress_by_project_id_centroid.csv'
71+
get_aggregated_progress_by_project_id_centroid(filename)
72+
csv_to_geojson(filename)
73+
6574
logger.info('start to export csv file for %s projects based on given project_id_list' % len(project_id_list))
6675
for project_id in project_id_list:
6776
filename = f'{DATA_PATH}/api-data/agg_res_by_task_id/agg_res_by_task_id_{project_id}.csv'
@@ -255,12 +264,44 @@ def get_aggregated_results_by_project_id_geom(filename):
255264
sql_query = """COPY (
256265
SELECT
257266
r.*
267+
,p.name
268+
,p.project_details
258269
,ST_AsText(p.geom) as geom
259270
FROM
260271
aggregated_results_by_project_id as r , projects as p
261272
WHERE
262273
r.project_id = p.project_id
263-
) TO STDOUT WITH CSV HEADER"""
274+
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""
275+
276+
with open(filename, 'w') as f:
277+
pg_db.copy_expert(sql_query, f)
278+
279+
del pg_db
280+
281+
logger.info('saved aggregated results by project_id to %s' % filename)
282+
283+
284+
def get_aggregated_results_by_project_id_centroid(filename):
285+
'''
286+
Export results aggregated on project_id basis as csv file.
287+
288+
Parameters
289+
----------
290+
filename: str
291+
'''
292+
293+
pg_db = auth.postgresDB()
294+
sql_query = """COPY (
295+
SELECT
296+
r.*
297+
,p.name
298+
,p.project_details
299+
,ST_AsText(ST_Centroid(p.geom)) as geom
300+
FROM
301+
aggregated_results_by_project_id as r , projects as p
302+
WHERE
303+
r.project_id = p.project_id
304+
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""
264305

265306
with open(filename, 'w') as f:
266307
pg_db.copy_expert(sql_query, f)
@@ -382,20 +423,51 @@ def get_aggregated_progress_by_project_id_geom(filename):
382423
filename: str
383424
'''
384425

385-
# TODO: Export aggregated_progress_by_project_id_geom.csv as geojson
386-
387426
pg_db = auth.postgresDB()
388427
sql_query = """
389428
COPY (
390429
SELECT
391430
r.*
431+
,p.name
432+
,p.project_details
392433
,ST_AsText(p.geom) as geom
393434
FROM
394435
aggregated_progress_by_project_id as r,
395436
projects as p
396437
WHERE
397438
p.project_id = r.project_id
398-
) TO STDOUT WITH CSV HEADER"""
439+
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""
440+
441+
with open(filename, 'w') as f:
442+
pg_db.copy_expert(sql_query, f)
443+
444+
del pg_db
445+
logger.info('saved aggregated progress by project_id to %s' % filename)
446+
447+
448+
def get_aggregated_progress_by_project_id_centroid(filename):
449+
'''
450+
Export aggregated progress on a project_id basis as csv file.
451+
452+
Parameters
453+
----------
454+
filename: str
455+
'''
456+
457+
pg_db = auth.postgresDB()
458+
sql_query = """
459+
COPY (
460+
SELECT
461+
r.*
462+
,p.name
463+
,p.project_details
464+
,ST_AsText(ST_Centroid(p.geom)) as geom
465+
FROM
466+
aggregated_progress_by_project_id as r,
467+
projects as p
468+
WHERE
469+
p.project_id = r.project_id
470+
) TO STDOUT WITH (FORMAT CSV, HEADER, FORCE_QUOTE(project_id, name, project_details))"""
399471

400472
with open(filename, 'w') as f:
401473
pg_db.copy_expert(sql_query, f)
@@ -546,3 +618,61 @@ def csv_to_geojson(filename):
546618
f'SELECT *, CAST(geom as geometry) FROM "{filename_without_path}"'
547619
], check=True)
548620
logger.info(f'converted {filename} to {outfile}.')
621+
622+
cast_datatypes_for_geojson(outfile)
623+
624+
625+
def csv_to_geojson_centroids(filename):
626+
'''
627+
Use ogr2ogr to convert csv file to GeoJSON
628+
'''
629+
630+
outfile = filename.replace('.csv', '_centroids.geojson')
631+
632+
# need to remove file here because ogr2ogr can't overwrite when choosing GeoJSON
633+
if os.path.isfile(outfile):
634+
os.remove(outfile)
635+
filename_without_path = filename.split('/')[-1].replace('.csv', '')
636+
# TODO: remove geom column from normal attributes in sql query
637+
subprocess.run([
638+
"ogr2ogr",
639+
"-f",
640+
"GeoJSON",
641+
outfile,
642+
filename,
643+
"-sql",
644+
f'SELECT *, ST_Centroid(CAST(geom as geometry)) FROM "{filename_without_path}"'
645+
], check=True)
646+
logger.info(f'converted {filename} to {outfile}.')
647+
648+
cast_datatypes_for_geojson(outfile)
649+
650+
651+
def cast_datatypes_for_geojson(filename):
652+
'''
653+
Go through geojson file and try to cast all values as float, except project_id
654+
remove redundant geometry property
655+
'''
656+
filename = filename.replace('csv', 'geojson')
657+
with open(filename) as f:
658+
geojson_data = json.load(f)
659+
660+
properties = list(geojson_data['features'][0]['properties'].keys())
661+
662+
for i in range(0, len(geojson_data['features'])):
663+
for property in properties:
664+
if property in ['project_id', 'name', 'project_details', 'task_id', 'group_id']:
665+
# don't try to cast project_id
666+
pass
667+
elif property in ['geom']:
668+
# remove redundant geometry property
669+
del geojson_data['features'][i]['properties'][property]
670+
else:
671+
try:
672+
geojson_data['features'][i]['properties'][property] = float(geojson_data['features'][i]['properties'][property])
673+
except:
674+
pass
675+
676+
with open(filename, 'w') as f:
677+
json.dump(geojson_data, f)
678+
logger.info(f'converted datatypes for {filename}.')

0 commit comments

Comments
 (0)