fetch.py 6.99 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
import time
import warnings
import geopandas as gpd
import overpy
from shapely.geometry import LineString, Polygon
import os
import shutil
import urllib.request
import zipfile
from typing import List
import pandas as pd
import tempfile


def make_overpy_request(request_body: str, retries: int):
    for _ in range(retries):
        try:
            api = overpy.Overpass()
            result = api.query(request_body)
        except (
            overpy.exception.OverpassTooManyRequests,
            overpy.exception.OverpassGatewayTimeout,
        ):
            warnings.warn("Overpass Too Many Requests --- Retry in 5 seconds")
            time.sleep(5)
        else:
            return result


def geoseries_streets(
    measurement_coords: gpd.GeoSeries, retries: int = 5, crop: bool = False
) -> gpd.GeoSeries:
    """
    Obtain the street shapes in the area spanned by the measurement_coords.

    :param measurement_coords: A geopandas geoseries of Points(lon, lat) representing the measurement positions in a EPSG:4326 projection
    :param retries: Number of retries for overpy requests, default:5
    :param crop: Set to True if the returned geoseries should be fixed to bounding box determined by measurement_coords, default: False

    :return: A geopandas geoseries consiting of LineStrings representing the streets in EPSG:4326
    """

    if measurement_coords.crs != "EPSG:4326":
        raise ValueError("Make sure to pass data with EPSG:4326 projection")

    # Find total bounds: (lat_min, long_min, lat_max, long_max)
    bounds = tuple(measurement_coords.total_bounds[[1, 0, 3, 2]])

    result = make_overpy_request(
        f"""
        [out:json][timeout:25];
        (
        way['highway']['highway'!='footway']['highway'!='platform']['highway'!='steps']{bounds};
        );
        out body;
        >;
        out skel qt;
        """,
        retries,
    )

    street_series = gpd.GeoSeries(
        LineString(((node.lon, node.lat) for node in way.nodes if len(way.nodes) > 1))
        for way in result.ways
    ).set_crs("EPSG:4326")

    if crop == True:
        street_series = crop_geoseries_by_bounds(street_series, measurement_coords)

    street_series = (
        street_series.explode()
    )  # if any MultiLineStrings, they are transformed to LineStrings
    return street_series


def geoseries_buildings(
    measurement_coords: gpd.GeoSeries, retries: int = 5
) -> gpd.GeoSeries:
    """
    Obtain the blockage shapes in the area spanned by the measurement_coords.

    :param measurement_coords: A geopandas geoseries of Points(lon, lat) representing the measurement positions in a EPSG:4326 projection

    :return: A geopandas geoseries consiting of Polygons representing the blockages in EPSG:4326
    """

    if measurement_coords.crs != "EPSG:4326":
        raise ValueError("Make sure to pass data with EPSG:4326 projection")

    # Find total bounds: (lat_min, long_min, lat_max, long_max)
    bounds = tuple(measurement_coords.total_bounds[[1, 0, 3, 2]])

    result = make_overpy_request(
        f"""
        [out:json][timeout:25];
        (
        way['building']{bounds};
        relation["building"]{bounds};
        );
        out body;
        >;
        out skel qt;
        """,
        retries,
    )

    blockages = gpd.GeoSeries(
        Polygon(
            (
                (node.lon, node.lat) for node in way.nodes if len(way.nodes) > 2
            )  # Polygon needs at least 3 nodes
        )
        for way in result.ways
    ).set_crs("EPSG:4326")

    return blockages

def pull_from_geodatenviewer_list(squares: List[str]) -> gpd.GeoDataFrame:
    """
    Downloads .zip files for specified squares from https://www.wien.gv.at/ma41datenviewer/public/,
    extracts and combines all .shp files to single geopandas geodataframe, then deletes the dir_path directory.

    :param dir_path: path to the directory where the files will be downloaded and extracted to
    :param squares: list of integers denoting the squares to be downloaded

    :return: geopandas geodataframe containing all data from .shp files
    """

    dir_path = tempfile.mkdtemp()  # create temporary directory

    # downloading .zip files
    # example of single square with data: https://www.wien.gv.at/ma41datenviewer/downloads/ma41/geodaten/fmzk_bkm/103081_bkm.zip
    for square_number in squares:
134 135 136 137 138 139 140 141
        try:
            urllib.request.urlretrieve(
                "https://www.wien.gv.at/ma41datenviewer/downloads/geodaten/fmzk_bkm/{}_bkm.zip".format(
                    square_number
                ),
                os.path.join(dir_path, "{}.zip".format(square_number)),
            )
        except: continue
142 143 144 145 146 147 148 149 150 151 152 153 154

    # extracting and deleting .zip files
    for item in os.listdir(dir_path):  # loop through items in dir
        if item.endswith(".zip"):
            file_name = os.path.join(dir_path, item)
            zip_ref = zipfile.ZipFile(file_name)  # create zipfile object
            zip_ref.extractall(dir_path)  # extract file to dir
            zip_ref.close()
            os.remove(file_name)  # delete zipped file

    # combine all .shp files
    geo_df_all = pd.DataFrame()
    for square in squares:
155 156 157 158
        try:
            geo_df = gpd.read_file(os.path.join(dir_path, square + "_bkm.shp"))
            geo_df_all = pd.concat([geo_df_all, geo_df], ignore_index=True)
        except: continue
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193

    shutil.rmtree(dir_path)  # deletes the directory containing all the files
    return geo_df_all


def pull_from_geodatenviewer_meas(
    measurement_coords: gpd.GeoSeries,
) -> gpd.GeoDataFrame:
    """
    Downloads raster .zip file for Vienna Austria, extracts all raster polygon IDs containing measurements,
    then calls pull_from_geodatenviewer_list() to extract building polygons.

    :param measurement_coords: geopandas geoseries containing measurements in EPSG:4326 projection

    :return: geopandas geodataframe containing all data from .shp files
    """

    if measurement_coords.crs != "EPSG:4326":
        raise ValueError("Make sure to pass data with EPSG:4326 projection")

    raster_path = tempfile.mkdtemp()
    urllib.request.urlretrieve(
        "https://data.wien.gv.at/daten/geo?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:MZKBLATT1000OGD&srsName=EPSG:4326&outputFormat=shape-zip",
        os.path.join(raster_path, "raster.zip"),
    )

    file_name = os.path.join(raster_path, "raster.zip")
    zip_ref = zipfile.ZipFile(file_name)  # create zipfile object
    zip_ref.extractall(raster_path)  # extract file to dir
    zip_ref.close()
    os.remove(file_name)  # delete zipped file

    for file in os.listdir(raster_path):
        if file.endswith(".shp"):
            raster_gdf = gpd.read_file(os.path.join(raster_path, file))
194 195
            
    raster_gdf.to_crs(4326,inplace=True)
196 197 198 199 200 201 202 203 204 205 206

    meas_gdf = gpd.GeoDataFrame(geometry=measurement_coords)

    poly_gdf = gpd.sjoin(
        raster_gdf, meas_gdf, op="contains"
    )  # find all polygons from raster that contain measurements

    polygonID_list = poly_gdf.MZK1000.unique()
    return_gdf = pull_from_geodatenviewer_list(polygonID_list)

    return return_gdf