Commit 69bd91f2 authored by Sonja Tripkovic's avatar Sonja Tripkovic

pull_from_geodatenviewer_list() and pull_geodatenviewer_meas()

parent 0bf49be1
import time import time
import warnings import warnings
import geopandas as gpd import geopandas as gpd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
...@@ -8,7 +7,13 @@ import overpy ...@@ -8,7 +7,13 @@ import overpy
from scipy.spatial.distance import cdist from scipy.spatial.distance import cdist
from shapely.geometry import LineString, Point, Polygon, box from shapely.geometry import LineString, Point, Polygon, box
from shapely.ops import unary_union from shapely.ops import unary_union
import os
import shutil
import urllib.request
import zipfile
from typing import List
import pandas as pd
import tempfile
def make_overpy_request(request_body: str, retries: int): def make_overpy_request(request_body: str, retries: int):
for _ in range(retries): for _ in range(retries):
...@@ -311,3 +316,88 @@ def delete_indoor_meas( ...@@ -311,3 +316,88 @@ def delete_indoor_meas(
outdoor_meas_gdf = meas_gdf.drop(indoor_meas_gdf.index).reset_index() outdoor_meas_gdf = meas_gdf.drop(indoor_meas_gdf.index).reset_index()
return outdoor_meas_gdf.geometry return outdoor_meas_gdf.geometry
def pull_from_geodatenviewer_list(squares: List[str]) -> gpd.GeoDataFrame:
"""
Downloads .zip files for specified squares from https://www.wien.gv.at/ma41datenviewer/public/,
extracts and combines all .shp files to single geopandas geodataframe, then deletes the dir_path directory.
:param dir_path: path to the directory where the files will be downloaded and extracted to
:param squares: list of integers denoting the squares to be downloaded
:return: geopandas geodataframe containing all data from .shp files
"""
dir_path = tempfile.mkdtemp() # create temporary directory
# downloading .zip files
# example of single square with data: https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/103081_bkm.zip
for square_number in squares:
urllib.request.urlretrieve(
"https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/{}_bkm.zip".format(
square_number
),
os.path.join(dir_path, "{}.zip".format(square_number)),
)
# extracting and deleting .zip files
for item in os.listdir(dir_path): # loop through items in dir
if item.endswith(".zip"):
file_name = os.path.join(dir_path, item)
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(dir_path) # extract file to dir
zip_ref.close()
os.remove(file_name) # delete zipped file
# combine all .shp files
geo_df_all = pd.DataFrame()
for square in squares:
geo_df = gpd.read_file(os.path.join(dir_path, square + "_bkm.shp"))
geo_df_all = pd.concat([geo_df_all, geo_df], ignore_index=True)
shutil.rmtree(dir_path) # deletes the directory containing all the files
return geo_df_all
def pull_from_geodatenviewer_meas(measurement_coords: gpd.GeoSeries) -> gpd.GeoDataFrame:
'''
Downloads raster .zip file for Vienna Austria, extracts all raster polygon IDs containing measurements,
then calls pull_from_geodatenviewer_list() to extract building polygons.
:param measurement_coords: geopandas geoseries containing measurements in EPSG:4326 projection
:return: geopandas geodataframe containing all data from .shp files
'''
if measurement_coords.crs != "EPSG:4326":
raise ValueError("Make sure to pass data with EPSG:4326 projection")
raster_path = tempfile.mkdtemp()
urllib.request.urlretrieve(
"https://data.wien.gv.at/daten/geo?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:MZKBLATT1000OGD&srsName=EPSG:4326&outputFormat=shape-zip",
os.path.join(raster_path, "raster.zip"),
)
file_name = os.path.join(raster_path, "raster.zip")
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(raster_path) # extract file to dir
zip_ref.close()
os.remove(file_name) # delete zipped file
for file in os.listdir(raster_path):
if file.endswith(".shp"):
raster_gdf = gpd.read_file(os.path.join(raster_path, file))
if raster_gdf.geometry.crs != 'EPSG:4326':
raise ValueError('Raster EPSG has changed, see https://www.data.gv.at/katalog/dataset/b2d17060-b2f4-4cd7-a2e5-64beccfeb4c1 for mor information.')
meas_gdf = gpd.GeoDataFrame(geometry=measurement_coords)
poly_gdf = gpd.sjoin(raster_gdf, meas_gdf, op = 'contains') # find all polygons from raster that contain measurements
polygonID_list = poly_gdf.MZK1000.unique()
return_gdf = pull_from_geodatenviewer_list(polygonID_list)
return return_gdf
...@@ -4,12 +4,11 @@ import shutil ...@@ -4,12 +4,11 @@ import shutil
import urllib.request import urllib.request
import zipfile import zipfile
from typing import List from typing import List
import geopandas as gpd import geopandas as gpd
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
import tempfile
def link_dataframes( def link_dataframes(
A: pd.DataFrame, B: pd.DataFrame, ref_col: str, metric=None, verbose=True A: pd.DataFrame, B: pd.DataFrame, ref_col: str, metric=None, verbose=True
...@@ -67,52 +66,3 @@ def link_dataframes( ...@@ -67,52 +66,3 @@ def link_dataframes(
return combined, np.array(deviations) return combined, np.array(deviations)
def pull_from_geodatenviewer(dir_path: str, squares: List[int]) -> gpd.GeoDataFrame:
"""
Downloads .zip files for specified squares from https://www.wien.gv.at/ma41datenviewer/public/,
extracts and combines all .shp files to single geopandas geodataframe, then deletes the dir_path directory.
:param dir_path: path to the directory where the files will be downloaded and extracted to
:param squares: list of integers denoting the squares to be downloaded
:return: geopandas geodataframe containing all data from .shp files
ToDo: extend this function to select squares automatically based on measurement coordinates
"""
# creates new directory if it doesn't exist
try:
os.makedirs(dir_path)
except FileExistsError:
raise Exception(
"The folder under this name already exists, choose another name for your directory."
)
# downloading .zip files
# example of single square with data: https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/103081_bkm.zip
for square_number in squares:
urllib.request.urlretrieve(
"https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/{}_bkm.zip".format(
square_number
),
os.path.join(dir_path, "{}.zip".format(square_number)),
)
# extracting and deleting .zip files
for item in os.listdir(dir_path): # loop through items in dir
if item.endswith(".zip"):
file_name = os.path.join(dir_path, item)
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(dir_path) # extract file to dir
zip_ref.close()
os.remove(file_name) # delete zipped file
# combine all .shp files
geo_df_all = pd.DataFrame()
for square in squares:
geo_df = gpd.read_file(os.path.join(dir_path, str(square) + "_bkm.shp"))
geo_df_all = pd.concat([geo_df_all, geo_df], ignore_index=True)
shutil.rmtree(dir_path) # deletes the directory containing all the files
return geo_df_all
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment