process.py 12.1 KB
Newer Older
1
import geopandas as gpd
2 3
import matplotlib.pyplot as plt
from shapely.geometry import LineString, Point, Polygon, box
4
from shapely.ops import unary_union
5
from tqdm import tqdm
6
import pandas as pd
7
from typing import Dict, Union, Callable
 Lukas Eller's avatar
Lukas Eller committed
8
import numpy as np
9 10 11 12 13 14 15 16

def project_onto_streets(
    point_series: gpd.GeoSeries,
    street_series: gpd.GeoSeries,
    epsg: str = "EPSG:31287",
    plot: bool = False,
) -> (gpd.GeoSeries, gpd.GeoSeries):
    """
17 18 19 20 21 22 23 24 25 26
    Projects each point from the point_series onto the closest element of
    street_series according to a normal projection. For that, both
    series will be transformed into a 2D projection (default EPSG:31287).

    :param point_series: GeoSeries of all points to project onto streets.
    :param street_series: GeoSeries of linestrings representing the streets.
    :param epsg: EPSG projection to use during the projection operation.
    :param plot: Boolean to indiceate whether the result of the operation shall be plotted.

    :return: A tuple of two GeoSeries containing the result of the operation as well as the deviation introduced.
27
    """
28

29
    if epsg == "EPSG:4326":
30
        raise ValueError(
31
            "Projection cannot be conducted using geographical coordinates such as EPSG:4326. Select 2D projection instead."
32
        )
33

34 35 36 37 38 39 40 41 42
    if point_series.crs is None or street_series.crs is None:
        raise ValueError(
            "CRS needs to be set explicitly for passed Geoseries: Use .set_crs()."
        )

    point_original_crs, street_original_crs = point_series.crs, street_series.crs
    point_series = point_series.to_crs(epsg)
    street_series = street_series.to_crs(epsg)

43
    projected = []
44
    for point in tqdm(point_series):
45 46
        street_ind = street_series.distance(point).argmin()
        projected.append(
47 48
            street_series.loc[street_ind].interpolate(
                street_series.loc[street_ind].project(point)
49
            ).iloc[0]
50 51
        )

52
    projected = gpd.GeoSeries(projected, index=point_series.index)
53 54 55 56 57 58 59 60 61
    projected = projected.set_crs(epsg)
    deviation_projection = projected.distance(point_series)

    if plot:
        plt.figure()
        plt.title("Deviation Histogram")
        deviation_projection.plot.hist(bins=50)
        plt.show()

62
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
63 64 65
        ax1.set_title("Raw Measurements")
        ax2.set_title("After Projection")
        ax1.scatter(point_series.x, point_series.y, label="Point Series")
66 67 68
        clb = ax2.scatter(
            projected.x, projected.y, c=deviation_projection, label="Projected Series"
        )
69 70 71 72
        for street in street_series:
            ax1.plot(*street.xy, color="black")
            ax2.plot(*street.xy, color="black")
        bar = fig.colorbar(clb)
73
        bar.ax.set_title("Deviation [Meters]")
74 75 76
        ax1.legend()
        ax2.legend()
        plt.show()
Lukas Eller's avatar
Lukas Eller committed
77

78
    # Reset Street and Point Series CRS
79 80 81
    street_series = street_series.to_crs(street_original_crs)
    point_series = point_series.to_crs(point_original_crs)
    projected = projected.to_crs(point_original_crs)
82

83
    return projected, deviation_projection
84

85 86 87 88

def crop_geoseries_by_bounds(
    original_geoseries: gpd.GeoSeries, filter_geoseries: gpd.GeoSeries
) -> gpd.GeoSeries:
89
    """
90
    Returns cut off geoseries from 'original_geoseries' which lies within the bounds on the filter_geoseries
91

92
    :param original_geoseries: A geopandas geoseries of points/streets/polygons to be filtered, represented in a EPSG:4326 projection
93 94 95 96
    :param filter_geoseries: A geopandas geoseries based on which a bounding box is for filter is made

    :return: geopandas geoseries of same format as original_geoseries
    """
97
    if original_geoseries.crs != "EPSG:4326" or filter_geoseries.crs != "EPSG:4326":
98 99 100
        raise ValueError("Make sure to pass data with EPSG:4326 projection")

    filter_bounds = filter_geoseries.total_bounds
101 102 103 104 105 106 107 108 109 110 111
    bound_box = box(
        filter_bounds[0], filter_bounds[1], filter_bounds[2], filter_bounds[3]
    )
    bound_box = gpd.GeoDataFrame(geometry=gpd.GeoSeries(bound_box)).set_crs(
        "EPSG:4326"
    )  # create geodataframe for overlay
    filtered_series = (
        gpd.overlay(
            gpd.GeoDataFrame(geometry=original_geoseries), bound_box, how="intersection"
        )
    ).geometry
112

113 114
    return filtered_series

115 116 117 118 119

def interpolate_series_at_distance(
    street_series: gpd.GeoSeries, distance: int = 2
) -> gpd.GeoSeries:
    """
120 121 122
    Interpolates along the geoseries streets_series to get nodes at a distance 'distance' apart.
    Make sure to pass data in Cartesian coordinates (e.g. use street_series.to_crs('EPSG:31287') for Austria).

123
    :param street_series: geopandas geoseries
124 125 126
    :param distance: integer determining the spacing between interpolation points, default=2

    :return: geopandas geoseries overlapping with original geoseries containing more frequent nodes
127
    """
128
    if street_series.crs == "EPSG:4326":
129 130 131 132 133
        raise ValueError(
            "Make sure to pass data in Cartesian coordinates (e.g. use street_series.to_crs('EPSG:31287') for Austria)"
        )

    if distance <= 0:
134 135 136 137
        raise ValueError("Distance has to be a positive number.")

    lines = []
    for linestring in street_series:
138 139 140 141 142 143 144 145 146 147 148 149 150 151
        list_points = []  # list to hold all the point coords
        current_dist = distance  # set the current distance to place the point
        line_length = (
            linestring.length
        )  # get the total length of the line (not number of points, but actual length)
        list_points.append(
            Point(list(linestring.coords[0]))
        )  # append the starting coordinate to the list
        while (
            current_dist < line_length
        ):  # while the current cumulative distance is less than the total length of the line
            list_points.append(
                linestring.interpolate(current_dist)
            )  # use interpolate and increase the current distance
152
            current_dist += distance
153 154 155
        list_points.append(
            Point(list(linestring.coords[-1]))
        )  # append end coordinate to the list
156 157 158 159
        line = LineString(list_points)
        lines.append(line)

    interpolated_street_series = gpd.GeoSeries(lines)
160
    return interpolated_street_series
161

162 163 164

def dissolve_touching_polygons(polygon_series: gpd.GeoSeries) -> gpd.GeoSeries:
    """
165 166 167 168 169
    Returns geoseries of dissolved touching polygons with closed polygon holes.

    :param polygon_series: geopandas geoseries of polygons

    :return: geopandas geoseries of dissolved polygons
170 171 172 173 174 175 176 177 178
    """
    poly_list = [
        polygon_series[i]
        for i in range(polygon_series.size)
        if polygon_series[i] != None
    ]
    poly_union = unary_union(
        poly_list
    )  # MultiPolygon containing polygon object sequence
179
    p_list = []
180 181 182 183 184
    for geom in poly_union.geoms:  # loop over polygon object sequence
        p = Polygon(
            geom.exterior
        )  # create new polygon only using the exterior (closes holes)
        p_list.append(p)
185 186 187 188
    polygon_series_disolved = gpd.GeoSeries(p_list)

    return polygon_series_disolved

189 190 191 192 193

def delete_indoor_meas(
    measurement_coords: gpd.GeoSeries, polygons: gpd.GeoSeries
) -> gpd.GeoSeries:
    """
194 195 196 197 198
    Removes indoor measurements by deleting all measurement points inside the polygons.

    :param measurement_coords: A geopandas geoseries of Points(lon, lat) representing the measurement positions in a EPSG:4326 projection
    :param polygons: A geopandas geoseries of Polygons in EPSG:4326 projection

199
    :return: A geopandas geoseries of Points(lon, lat) representing the outdoor measurement positions in a EPSG:4326 projection
200

201 202 203
    """
    meas_gdf = gpd.GeoDataFrame(geometry=measurement_coords)
    polygons_gdf = gpd.GeoDataFrame(geometry=polygons)
204

205
    indoor_meas_gdf = gpd.sjoin(meas_gdf, polygons_gdf)
206 207
    outdoor_meas_gdf = meas_gdf.drop(indoor_meas_gdf.index).reset_index()

208
    return outdoor_meas_gdf.geometry
209

210

211 212 213 214
def assign_landuse_classes(
    point_geodataframe: gpd.GeoDataFrame,
    polygon_geodataframe: gpd.GeoDataFrame,
    cut_polygons_by_buffer: bool = False,
 Lukas Eller's avatar
Lukas Eller committed
215
    buffer_size: int = 0,
216
    category_function : Union[None, Callable] = lambda land_use_code: "Urban" if land_use_code < 200 else "Rural",
217
    epsg: str = "EPSG:31287",
218 219
    n_categories: int = 5
) -> gpd.GeoDataFrame:
220 221 222 223 224 225 226 227 228 229 230
    '''
    Returns a DataFrame with the landuse class distribution in the area defined by the buffer_size around the passed locations
    Landuseclasses are defined according to the Corine Land Cover Project:
    https://land.copernicus.eu/user-corner/technical-library/corine-land-cover-nomenclature-guidelines/html

    For Austria the land_use_classes can be obtained via:
    https://www.data.gv.at/katalog/dataset/76617316-b9e6-4bcd-ba09-e328b578fed2

    For Germany the source is:
    https://gis.uba.de/catalog/OpenSearch.do?type=/Query/OpenSearch.do&search=CORINE

231 232
    Current versions of the respective land_use_classes source files are provided in the gitlab project under /data

233 234 235 236
    :param point_geodataframe: A Geodataframe with geometry of type point for which the landuse classes will be evaluated.
    :param polygon_geodataframe: A GeoDataFrame with the landuse class polygons. Can be retrieved from
    :param cut_polygons_by_buffer: If True the polygon_geodataframe will be cut by the total bounds of the point series. Can result in a speedup.
    :param buffer_size: The buffer_size in meters used to construct the area around points for which the distribution of classes will be computed.
237
    :param category_function: If not None a additional dataframe is returned which aggregates land_use classes to the defined categories
238
    :param epsg: The projection used for the computation of the area. Defaults to EPSG:31287 for Austria.
239
    :param n_categories: Maximum number of top categories sorted by the respective percentage values to be considered.
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281

    :return: A GeoDataFrame with columns describing the percentage of the area assigned to the respective land use class.
    '''

    if point_geodataframe.crs is None or polygon_geodataframe.crs is None:
        raise ValueError(
            "CRS needs to be set explicitly for passed Geoseries: Use .set_crs()."
        )

    point_geodataframe['first5codes'] = None
    point_geodataframe['first5percent'] = None

    if buffer_size < 0:
        raise ValueError(
            "Buffer size has to be a positive integer. Default value is zero."
        )
    if buffer_size != 0:
        point_geodataframe.geometry = point_geodataframe.geometry.buffer(buffer_size)

    if cut_polygons_by_buffer == True:
        xmin, ymin, xmax, ymax = point_geodataframe.to_crs(epsg).total_bounds
        polygon_geodataframe = polygon_geodataframe.cx[xmin-buffer_size:xmax+buffer_size, ymin-buffer_size:ymax+buffer_size]

    point_geodataframe = point_geodataframe.to_crs(epsg)
    polygon_geodataframe = polygon_geodataframe.to_crs(epsg)

    total = len(point_geodataframe)
    results = {}

    for i, element in tqdm(point_geodataframe.iterrows(), total=total):
        point_buffer = element.geometry

        percentages = []
        codes = []
        for j, poly in enumerate(polygon_geodataframe.geometry):
            if point_buffer.intersects(poly):
                percentages.append(point_buffer.intersection(poly).area/point_buffer.area*100)
                codes.append(polygon_geodataframe.iloc[j]['CODE_18'])
        code_percent_dict = {}
        for key, value in zip(codes, percentages):
            code_percent_dict[key] = code_percent_dict.get(key, 0) + value
        code_percent_dict = dict(sorted(code_percent_dict.items(), key=lambda x: x[1], reverse =True))
282 283 284 285 286 287 288 289 290 291 292 293 294 295
        firstnpairs = {int(k): dict(code_percent_dict)[k] for k in list(dict(code_percent_dict))[:n_categories]}

        results[i] = firstnpairs

    if category_function is not None:
        processed = {}
        for key, result in results.items():
            processed[key] = {}
            for land_use, area_percentage in result.items():
                category = category_function(land_use)
                processed[key][category] = processed[key].get(category, 0) + area_percentage

        processed_result = pd.DataFrame(processed).transpose()
        processed_result.replace(np.nan, 0, inplace=True)
296

297
        return results, processed_result
298

299 300
    else:
        return results