From 69bd91f2c7dca16cf0f6e538c08b521a26784d9c Mon Sep 17 00:00:00 2001 From: Sonja Tripkovic Date: Thu, 22 Apr 2021 10:15:48 +0200 Subject: [PATCH] pull_from_geodatenviewer_list() and pull_geodatenviewer_meas() --- measprocess/geospatial.py | 94 ++++++++++++++++++++++++++++++++++++++- measprocess/preprocess.py | 52 +--------------------- 2 files changed, 93 insertions(+), 53 deletions(-) diff --git a/measprocess/geospatial.py b/measprocess/geospatial.py index c0edede..dddf617 100644 --- a/measprocess/geospatial.py +++ b/measprocess/geospatial.py @@ -1,6 +1,5 @@ import time import warnings - import geopandas as gpd import matplotlib.pyplot as plt import numpy as np @@ -8,7 +7,13 @@ import overpy from scipy.spatial.distance import cdist from shapely.geometry import LineString, Point, Polygon, box from shapely.ops import unary_union - +import os +import shutil +import urllib.request +import zipfile +from typing import List +import pandas as pd +import tempfile def make_overpy_request(request_body: str, retries: int): for _ in range(retries): @@ -311,3 +316,88 @@ def delete_indoor_meas( outdoor_meas_gdf = meas_gdf.drop(indoor_meas_gdf.index).reset_index() return outdoor_meas_gdf.geometry + + +def pull_from_geodatenviewer_list(squares: List[str]) -> gpd.GeoDataFrame: + """ + Downloads .zip files for specified squares from https://www.wien.gv.at/ma41datenviewer/public/, + extracts and combines all .shp files to single geopandas geodataframe, then deletes the dir_path directory. + + :param dir_path: path to the directory where the files will be downloaded and extracted to + :param squares: list of integers denoting the squares to be downloaded + + :return: geopandas geodataframe containing all data from .shp files + """ + + dir_path = tempfile.mkdtemp() # create temporary directory + + # downloading .zip files + # example of single square with data: https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/103081_bkm.zip + for square_number in squares: + urllib.request.urlretrieve( + "https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/{}_bkm.zip".format( + square_number + ), + os.path.join(dir_path, "{}.zip".format(square_number)), + ) + + # extracting and deleting .zip files + for item in os.listdir(dir_path): # loop through items in dir + if item.endswith(".zip"): + file_name = os.path.join(dir_path, item) + zip_ref = zipfile.ZipFile(file_name) # create zipfile object + zip_ref.extractall(dir_path) # extract file to dir + zip_ref.close() + os.remove(file_name) # delete zipped file + + # combine all .shp files + geo_df_all = pd.DataFrame() + for square in squares: + geo_df = gpd.read_file(os.path.join(dir_path, square + "_bkm.shp")) + geo_df_all = pd.concat([geo_df_all, geo_df], ignore_index=True) + + shutil.rmtree(dir_path) # deletes the directory containing all the files + return geo_df_all + + +def pull_from_geodatenviewer_meas(measurement_coords: gpd.GeoSeries) -> gpd.GeoDataFrame: + ''' + Downloads raster .zip file for Vienna Austria, extracts all raster polygon IDs containing measurements, + then calls pull_from_geodatenviewer_list() to extract building polygons. + + :param measurement_coords: geopandas geoseries containing measurements in EPSG:4326 projection + + :return: geopandas geodataframe containing all data from .shp files + ''' + + if measurement_coords.crs != "EPSG:4326": + raise ValueError("Make sure to pass data with EPSG:4326 projection") + + raster_path = tempfile.mkdtemp() + urllib.request.urlretrieve( + "https://data.wien.gv.at/daten/geo?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:MZKBLATT1000OGD&srsName=EPSG:4326&outputFormat=shape-zip", + os.path.join(raster_path, "raster.zip"), + ) + + file_name = os.path.join(raster_path, "raster.zip") + zip_ref = zipfile.ZipFile(file_name) # create zipfile object + zip_ref.extractall(raster_path) # extract file to dir + zip_ref.close() + os.remove(file_name) # delete zipped file + + for file in os.listdir(raster_path): + if file.endswith(".shp"): + raster_gdf = gpd.read_file(os.path.join(raster_path, file)) + if raster_gdf.geometry.crs != 'EPSG:4326': + raise ValueError('Raster EPSG has changed, see https://www.data.gv.at/katalog/dataset/b2d17060-b2f4-4cd7-a2e5-64beccfeb4c1 for mor information.') + + meas_gdf = gpd.GeoDataFrame(geometry=measurement_coords) + + poly_gdf = gpd.sjoin(raster_gdf, meas_gdf, op = 'contains') # find all polygons from raster that contain measurements + + polygonID_list = poly_gdf.MZK1000.unique() + return_gdf = pull_from_geodatenviewer_list(polygonID_list) + + return return_gdf + + diff --git a/measprocess/preprocess.py b/measprocess/preprocess.py index 86067be..1b67795 100644 --- a/measprocess/preprocess.py +++ b/measprocess/preprocess.py @@ -4,12 +4,11 @@ import shutil import urllib.request import zipfile from typing import List - import geopandas as gpd import numpy as np import pandas as pd from tqdm import tqdm - +import tempfile def link_dataframes( A: pd.DataFrame, B: pd.DataFrame, ref_col: str, metric=None, verbose=True @@ -67,52 +66,3 @@ def link_dataframes( return combined, np.array(deviations) -def pull_from_geodatenviewer(dir_path: str, squares: List[int]) -> gpd.GeoDataFrame: - """ - Downloads .zip files for specified squares from https://www.wien.gv.at/ma41datenviewer/public/, - extracts and combines all .shp files to single geopandas geodataframe, then deletes the dir_path directory. - - :param dir_path: path to the directory where the files will be downloaded and extracted to - :param squares: list of integers denoting the squares to be downloaded - - :return: geopandas geodataframe containing all data from .shp files - - ToDo: extend this function to select squares automatically based on measurement coordinates - - """ - - # creates new directory if it doesn't exist - try: - os.makedirs(dir_path) - except FileExistsError: - raise Exception( - "The folder under this name already exists, choose another name for your directory." - ) - - # downloading .zip files - # example of single square with data: https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/103081_bkm.zip - for square_number in squares: - urllib.request.urlretrieve( - "https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/{}_bkm.zip".format( - square_number - ), - os.path.join(dir_path, "{}.zip".format(square_number)), - ) - - # extracting and deleting .zip files - for item in os.listdir(dir_path): # loop through items in dir - if item.endswith(".zip"): - file_name = os.path.join(dir_path, item) - zip_ref = zipfile.ZipFile(file_name) # create zipfile object - zip_ref.extractall(dir_path) # extract file to dir - zip_ref.close() - os.remove(file_name) # delete zipped file - - # combine all .shp files - geo_df_all = pd.DataFrame() - for square in squares: - geo_df = gpd.read_file(os.path.join(dir_path, str(square) + "_bkm.shp")) - geo_df_all = pd.concat([geo_df_all, geo_df], ignore_index=True) - - shutil.rmtree(dir_path) # deletes the directory containing all the files - return geo_df_all -- 2.22.0