Commit 69bd91f2 authored by Sonja Tripkovic's avatar Sonja Tripkovic

pull_from_geodatenviewer_list() and pull_geodatenviewer_meas()

parent 0bf49be1
import time
import warnings
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
......@@ -8,7 +7,13 @@ import overpy
from scipy.spatial.distance import cdist
from shapely.geometry import LineString, Point, Polygon, box
from shapely.ops import unary_union
import os
import shutil
import urllib.request
import zipfile
from typing import List
import pandas as pd
import tempfile
def make_overpy_request(request_body: str, retries: int):
for _ in range(retries):
......@@ -311,3 +316,88 @@ def delete_indoor_meas(
outdoor_meas_gdf = meas_gdf.drop(indoor_meas_gdf.index).reset_index()
return outdoor_meas_gdf.geometry
def pull_from_geodatenviewer_list(squares: List[str]) -> gpd.GeoDataFrame:
"""
Downloads .zip files for specified squares from https://www.wien.gv.at/ma41datenviewer/public/,
extracts and combines all .shp files to single geopandas geodataframe, then deletes the dir_path directory.
:param dir_path: path to the directory where the files will be downloaded and extracted to
:param squares: list of integers denoting the squares to be downloaded
:return: geopandas geodataframe containing all data from .shp files
"""
dir_path = tempfile.mkdtemp() # create temporary directory
# downloading .zip files
# example of single square with data: https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/103081_bkm.zip
for square_number in squares:
urllib.request.urlretrieve(
"https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/{}_bkm.zip".format(
square_number
),
os.path.join(dir_path, "{}.zip".format(square_number)),
)
# extracting and deleting .zip files
for item in os.listdir(dir_path): # loop through items in dir
if item.endswith(".zip"):
file_name = os.path.join(dir_path, item)
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(dir_path) # extract file to dir
zip_ref.close()
os.remove(file_name) # delete zipped file
# combine all .shp files
geo_df_all = pd.DataFrame()
for square in squares:
geo_df = gpd.read_file(os.path.join(dir_path, square + "_bkm.shp"))
geo_df_all = pd.concat([geo_df_all, geo_df], ignore_index=True)
shutil.rmtree(dir_path) # deletes the directory containing all the files
return geo_df_all
def pull_from_geodatenviewer_meas(measurement_coords: gpd.GeoSeries) -> gpd.GeoDataFrame:
'''
Downloads raster .zip file for Vienna Austria, extracts all raster polygon IDs containing measurements,
then calls pull_from_geodatenviewer_list() to extract building polygons.
:param measurement_coords: geopandas geoseries containing measurements in EPSG:4326 projection
:return: geopandas geodataframe containing all data from .shp files
'''
if measurement_coords.crs != "EPSG:4326":
raise ValueError("Make sure to pass data with EPSG:4326 projection")
raster_path = tempfile.mkdtemp()
urllib.request.urlretrieve(
"https://data.wien.gv.at/daten/geo?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:MZKBLATT1000OGD&srsName=EPSG:4326&outputFormat=shape-zip",
os.path.join(raster_path, "raster.zip"),
)
file_name = os.path.join(raster_path, "raster.zip")
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(raster_path) # extract file to dir
zip_ref.close()
os.remove(file_name) # delete zipped file
for file in os.listdir(raster_path):
if file.endswith(".shp"):
raster_gdf = gpd.read_file(os.path.join(raster_path, file))
if raster_gdf.geometry.crs != 'EPSG:4326':
raise ValueError('Raster EPSG has changed, see https://www.data.gv.at/katalog/dataset/b2d17060-b2f4-4cd7-a2e5-64beccfeb4c1 for mor information.')
meas_gdf = gpd.GeoDataFrame(geometry=measurement_coords)
poly_gdf = gpd.sjoin(raster_gdf, meas_gdf, op = 'contains') # find all polygons from raster that contain measurements
polygonID_list = poly_gdf.MZK1000.unique()
return_gdf = pull_from_geodatenviewer_list(polygonID_list)
return return_gdf
......@@ -4,12 +4,11 @@ import shutil
import urllib.request
import zipfile
from typing import List
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm
import tempfile
def link_dataframes(
A: pd.DataFrame, B: pd.DataFrame, ref_col: str, metric=None, verbose=True
......@@ -67,52 +66,3 @@ def link_dataframes(
return combined, np.array(deviations)
def pull_from_geodatenviewer(dir_path: str, squares: List[int]) -> gpd.GeoDataFrame:
"""
Downloads .zip files for specified squares from https://www.wien.gv.at/ma41datenviewer/public/,
extracts and combines all .shp files to single geopandas geodataframe, then deletes the dir_path directory.
:param dir_path: path to the directory where the files will be downloaded and extracted to
:param squares: list of integers denoting the squares to be downloaded
:return: geopandas geodataframe containing all data from .shp files
ToDo: extend this function to select squares automatically based on measurement coordinates
"""
# creates new directory if it doesn't exist
try:
os.makedirs(dir_path)
except FileExistsError:
raise Exception(
"The folder under this name already exists, choose another name for your directory."
)
# downloading .zip files
# example of single square with data: https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/103081_bkm.zip
for square_number in squares:
urllib.request.urlretrieve(
"https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/{}_bkm.zip".format(
square_number
),
os.path.join(dir_path, "{}.zip".format(square_number)),
)
# extracting and deleting .zip files
for item in os.listdir(dir_path): # loop through items in dir
if item.endswith(".zip"):
file_name = os.path.join(dir_path, item)
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(dir_path) # extract file to dir
zip_ref.close()
os.remove(file_name) # delete zipped file
# combine all .shp files
geo_df_all = pd.DataFrame()
for square in squares:
geo_df = gpd.read_file(os.path.join(dir_path, str(square) + "_bkm.shp"))
geo_df_all = pd.concat([geo_df_all, geo_df], ignore_index=True)
shutil.rmtree(dir_path) # deletes the directory containing all the files
return geo_df_all
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment