From 07791eff9c909ea906b7f0cd3cba643139bfeac6 Mon Sep 17 00:00:00 2001 From: Sonja Tripkovic Date: Tue, 20 Apr 2021 11:32:36 +0200 Subject: [PATCH] added pull_from_geodatenviewer in preprocess --- measprocess/preprocess.py | 53 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/measprocess/preprocess.py b/measprocess/preprocess.py index cc1e83d..24faad0 100644 --- a/measprocess/preprocess.py +++ b/measprocess/preprocess.py @@ -2,6 +2,12 @@ import pandas as pd import itertools import numpy as np from tqdm import tqdm +import urllib.request +import os +import zipfile +import geopandas as gpd +from typing import List +import shutil def link_dataframes(A: pd.DataFrame, B: pd.DataFrame, ref_col: str, metric=None, verbose=True) -> (pd.DataFrame, np.ndarray): ''' @@ -59,3 +65,50 @@ def link_dataframes(A: pd.DataFrame, B: pd.DataFrame, ref_col: str, metric=None, ) return combined, np.array(deviations) + +def pull_from_geodatenviewer(dir_path: str, squares: List[int]) -> gpd.GeoDataFrame: + ''' + Downloads .zip files for specified squares from https://www.wien.gv.at/ma41datenviewer/public/, + extracts and combines all .shp files to single geopandas geodataframe, then deletes the dir_path directory. + + :param dir_path: path to the directory where the files will be downloaded and extracted to + :param squares: list of integers denoting the squares to be downloaded + + :return: geopandas geodataframe containing all data from .shp files + + ToDo: extend this function to select squares automatically based on measurement coordinates + + ''' + + # creates new directory if it doesn't exist + try: + os.makedirs(dir_path) + except FileExistsError: + raise Exception('The folder under this name already exists, choose another name for your directory.') + + # downloading .zip files + # example of single square with data: https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/103081_bkm.zip + for square_number in squares: + urllib.request.urlretrieve( + "https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/{}_bkm.zip".format(square_number), + os.path.join(dir_path, "{}.zip".format(square_number)) + ) + + # extracting and deleting .zip files + for item in os.listdir(dir_path): # loop through items in dir + if item.endswith(".zip"): + file_name = os.path.join(dir_path, item) + zip_ref = zipfile.ZipFile(file_name) # create zipfile object + zip_ref.extractall(dir_path) # extract file to dir + zip_ref.close() + os.remove(file_name) # delete zipped file + + # combine all .shp files + geo_df_all = pd.DataFrame() + for square in squares: + geo_df = gpd.read_file(os.path.join(dir_path, str(square) + '_bkm.shp')) + geo_df_all = pd.concat([geo_df_all,geo_df],ignore_index=True) + + shutil.rmtree(dir_path) # deletes the directory containing all the files + return geo_df_all + -- 2.22.0