From db2ecede15e4baedb94fc5c3180902b6ae9d3830 Mon Sep 17 00:00:00 2001 From: Sonja Tripkovic Date: Wed, 5 May 2021 10:10:47 +0200 Subject: [PATCH] added interpolate_signal_loc() in rtr process --- measprocess/rtr/process.py | 80 +++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/measprocess/rtr/process.py b/measprocess/rtr/process.py index ddaa775..12e7afa 100644 --- a/measprocess/rtr/process.py +++ b/measprocess/rtr/process.py @@ -1,8 +1,16 @@ import os.path as path import pandas as pd import numpy as np +import geopandas as gpd +from scipy import interpolate -def process_4G(details, path_to_store_files): +def process(details, path_to_store_files): + ''' + Generates 2 csv files: signal.csv and location.csv and stores them in a folder defined under path_to_store_files + + :param details: details returned by the function rtr_details() from fetch.py in rtr + :param path_to_store_files: existing path at which the generated csv files will be saved to + ''' d = pd.DataFrame(details) path_processed_signal = path.join(path_to_store_files,'signal.csv') path_processed_location = path.join(path_to_store_files,'location.csv') @@ -52,4 +60,72 @@ def process_4G(details, path_to_store_files): tests_locations, ignore_index=True ).to_csv(path_processed_location, index=False) - print('\t\tDone.') \ No newline at end of file + print('\t\tDone.') + +def interpolate_signal_loc(path_processed_signal, path_processed_location, path_processed_final=None): + ''' + Interpolates gps location values based on path_processed_location for each not NaN lte_rsrp value. + + :param path_processed_signal: path of signal.csv + :param path_processed_location: path of location.csv + :path_processed_final: path of final.csv where the returned dataframe is stored, this is optional, default is None + + :return df_signal_final: pandas dataframe containing all not NaN values from signal.csv, + including interpolated longitude and latitude for each value + ''' + + df_signal = pd.read_csv(path_processed_signal) + df_loc = pd.read_csv(path_processed_location) + + df_signal = df_signal[df_signal['cat_technology']=='4G'] #select only 4G techonology + df_signal = df_signal[df_signal['lte_rsrp'].notna()] # remove all where lte_rsrp value is missing + + #grouping both dfs by id + df_signal_grouped = df_signal.groupby(['id']) + df_loc_grouped = df_loc.groupby(['id']) + + #loop over each uuid in df_signal_grouped + df_signal_final=[] + i = 0 + for ID, df_signal_selected in df_signal_grouped: + i=i+1 + if (i % 50) == 0: + print('\t\t{}/{}'.format(i, len(df_signal_grouped))) + + df_loc_selected = df_loc_grouped.get_group(ID) #get locations with same uuid + + if len(df_loc_selected)<2: #we need at least 2 locations for interpolation, otherwise skip it + continue + + lon_lat = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_loc_selected['long'], df_loc_selected['lat'])) + timeseries_loc = df_loc_selected['time_elapsed'].to_numpy() + timeseries_signal = df_signal_selected['time_elapsed'].to_numpy() + + # delete those where we would have to extrapolate + S = [i for i in timeseries_signal if i >= timeseries_loc.min() and i <= timeseries_loc.max()] + df_signal_selected = df_signal_selected[df_signal_selected['time_elapsed'].isin(S)] + timeseries_signal = df_signal_selected['time_elapsed'].to_numpy() + + # do interpolation in EPSG:31287 + lon_lat = lon_lat.set_crs("EPSG:4326") + x_y = lon_lat.to_crs("EPSG:31287") + interp_x = interpolate.interp1d(timeseries_loc, x_y.geometry.x.to_numpy()) + xnew = interp_x(timeseries_signal) + interp_y = interpolate.interp1d(timeseries_loc, x_y.geometry.y.to_numpy()) + ynew = interp_y(timeseries_signal) + df_signal_selected['x'] = xnew + df_signal_selected['y'] = ynew + + df_signal_final.append(df_signal_selected) + + df_signal_final = pd.concat(df_signal_final, ignore_index=True) + xy = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_signal_final.x, df_signal_final.y)).set_crs('EPSG:31287').to_crs('EPSG:4326') + df_signal_final.drop(['x','y'], axis=1, inplace=True) #remove x and y in epsg:31287 + # add long and lat in epsg:4326 + df_signal_final['long'] = xy.geometry.x + df_signal_final['lat'] = xy.geometry.y + + if path_processed_final!=None: + df_signal_final.to_csv(path_processed_final, index=False) + + return df_signal_final \ No newline at end of file -- 2.22.0