From 3af6294252744d7b7be8bebb0a1257c97562017a Mon Sep 17 00:00:00 2001 From: Lukas Eller Date: Fri, 23 Apr 2021 13:33:59 +0200 Subject: [PATCH] included first variant of link_measurement_bs --> currently commented out --- measprocess/preprocess.py | 113 +++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/measprocess/preprocess.py b/measprocess/preprocess.py index 1b67795..c46f8f3 100644 --- a/measprocess/preprocess.py +++ b/measprocess/preprocess.py @@ -8,7 +8,11 @@ import geopandas as gpd import numpy as np import pandas as pd from tqdm import tqdm -import tempfile +from shapely.geometry import Point + +import pickle +import warnings + def link_dataframes( A: pd.DataFrame, B: pd.DataFrame, ref_col: str, metric=None, verbose=True @@ -66,3 +70,110 @@ def link_dataframes( return combined, np.array(deviations) +""" +def link_measurement_bs( combined, bs_data, used_epsg="EPSG:31287", distance_treshold=1000, band_mapping=None, verbose=True): + ''' + We assume that the elements can be found under a multiindexed dataframe + Combined Data should include: + A: Band, PCI + B: 'Lon.', 'Lat.' + + BS Data should include: + technology, physicalcellid, longitude, latitude + ''' + + if band_mapping is None: + band_mapping = { + 70003: ['LTE1800', 'LTE1805 - 10 MHz'], + 70020: ['LTE800- 20 MHz', 'LTE801- 10 MHz'] + } + + ''' + All Bands which are not in the band mapping can not be considered and will be removed + Also the BS data will be filtered for all the valid elements + ''' + + for band in combined.A.Band.unique(): + if band not in band_mapping.keys(): + warnings.warn(f"Band {band} not in Mapping --- Measurements discarded") + + all_technologies = [] + for _, technology in band_mapping.items(): + all_technologies = all_technologies + technology + + bs_data = bs_data[bs_data.technology.isin(all_technologies)].reset_index(drop=True) + combined = combined[combined.A.Band.isin(band_mapping.keys())].reset_index(drop=True) + + ''' + Build the Geoseries for the measurements and BS DataFrames + This is needed to select the PCIs with the minimum distance + ''' + + bs_geo_series = gpd.GeoSeries(list( + tqdm( + ( + Point(bs_sector.longitude, bs_sector.latitude) for _, bs_sector in bs_data.iterrows() + ), + total=len(bs_data), + disable=(not verbose), + leave=False, + desc="Build Basestation GeoSeries [1/3]" + ) + )).set_crs("EPSG:4326").to_crs(used_epsg) + + measurement_geo_series = gpd.GeoSeries(list( + tqdm( + ( + Point(measurement.B[['Lon.', 'Lat.']]) for _, measurement in combined.iterrows() + ), + total=len(combined), + disable=(not verbose), + leave=False, + desc="Build Measurement GeoSeries [2/3]" + ) + )).set_crs("EPSG:4326").to_crs(used_epsg) + + ''' + Link the measurements based on the distance and PCI + ''' + + dropped_measurements = 0 + selected_bs_indices, selected_meas_indices = [], [] + for i, measurement in tqdm(combined.iterrows(), total=combined.shape[0], disable=(not verbose), desc="Select BS [3/3]"): + + bs_candidates = bs_data[ + (bs_data.physicalcellid == measurement.A.PCI) & + (bs_data.technology.isin(band_mapping[measurement.A.Band])) + ] + + if not len(bs_candidates): + dropped_measurements += 1 + else: + distances = bs_geo_series[bs_candidates.index].distance( + measurement_geo_series[i] + ) + + if distances.min() > distance_treshold: + dropped_measurements += 1 + else: + selected_index = distances.index[distances.argmin()] + selected_bs_indices.append( + selected_index + ) + + selected_meas_indices.append( + i + ) + + new_frame = bs_data.loc[selected_bs_indices] + new_frame = new_frame.reset_index(drop=True) + + #Different Lengths? + combined = combined.loc[selected_meas_indices] + combined = combined.reset_index(drop=True) + + if dropped_measurements > 0: + warnings.warn(f"{dropped_measurements} measurements were dropped because no BS was found") + + return pd.concat([combined.A, combined.B, new_frame], keys=['A', 'B', 'C'], axis=1) +""" -- 2.22.0