Commit db2ecede authored by Sonja Tripkovic's avatar Sonja Tripkovic

added interpolate_signal_loc() in rtr process

parent 9387dd8e
import os.path as path
import pandas as pd
import numpy as np
import geopandas as gpd
from scipy import interpolate
def process_4G(details, path_to_store_files):
def process(details, path_to_store_files):
'''
Generates 2 csv files: signal.csv and location.csv and stores them in a folder defined under path_to_store_files
:param details: details returned by the function rtr_details() from fetch.py in rtr
:param path_to_store_files: existing path at which the generated csv files will be saved to
'''
d = pd.DataFrame(details)
path_processed_signal = path.join(path_to_store_files,'signal.csv')
path_processed_location = path.join(path_to_store_files,'location.csv')
......@@ -52,4 +60,72 @@ def process_4G(details, path_to_store_files):
tests_locations, ignore_index=True
).to_csv(path_processed_location, index=False)
print('\t\tDone.')
\ No newline at end of file
print('\t\tDone.')
def interpolate_signal_loc(path_processed_signal, path_processed_location, path_processed_final=None):
'''
Interpolates gps location values based on path_processed_location for each not NaN lte_rsrp value.
:param path_processed_signal: path of signal.csv
:param path_processed_location: path of location.csv
:path_processed_final: path of final.csv where the returned dataframe is stored, this is optional, default is None
:return df_signal_final: pandas dataframe containing all not NaN values from signal.csv,
including interpolated longitude and latitude for each value
'''
df_signal = pd.read_csv(path_processed_signal)
df_loc = pd.read_csv(path_processed_location)
df_signal = df_signal[df_signal['cat_technology']=='4G'] #select only 4G techonology
df_signal = df_signal[df_signal['lte_rsrp'].notna()] # remove all where lte_rsrp value is missing
#grouping both dfs by id
df_signal_grouped = df_signal.groupby(['id'])
df_loc_grouped = df_loc.groupby(['id'])
#loop over each uuid in df_signal_grouped
df_signal_final=[]
i = 0
for ID, df_signal_selected in df_signal_grouped:
i=i+1
if (i % 50) == 0:
print('\t\t{}/{}'.format(i, len(df_signal_grouped)))
df_loc_selected = df_loc_grouped.get_group(ID) #get locations with same uuid
if len(df_loc_selected)<2: #we need at least 2 locations for interpolation, otherwise skip it
continue
lon_lat = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_loc_selected['long'], df_loc_selected['lat']))
timeseries_loc = df_loc_selected['time_elapsed'].to_numpy()
timeseries_signal = df_signal_selected['time_elapsed'].to_numpy()
# delete those where we would have to extrapolate
S = [i for i in timeseries_signal if i >= timeseries_loc.min() and i <= timeseries_loc.max()]
df_signal_selected = df_signal_selected[df_signal_selected['time_elapsed'].isin(S)]
timeseries_signal = df_signal_selected['time_elapsed'].to_numpy()
# do interpolation in EPSG:31287
lon_lat = lon_lat.set_crs("EPSG:4326")
x_y = lon_lat.to_crs("EPSG:31287")
interp_x = interpolate.interp1d(timeseries_loc, x_y.geometry.x.to_numpy())
xnew = interp_x(timeseries_signal)
interp_y = interpolate.interp1d(timeseries_loc, x_y.geometry.y.to_numpy())
ynew = interp_y(timeseries_signal)
df_signal_selected['x'] = xnew
df_signal_selected['y'] = ynew
df_signal_final.append(df_signal_selected)
df_signal_final = pd.concat(df_signal_final, ignore_index=True)
xy = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_signal_final.x, df_signal_final.y)).set_crs('EPSG:31287').to_crs('EPSG:4326')
df_signal_final.drop(['x','y'], axis=1, inplace=True) #remove x and y in epsg:31287
# add long and lat in epsg:4326
df_signal_final['long'] = xy.geometry.x
df_signal_final['lat'] = xy.geometry.y
if path_processed_final!=None:
df_signal_final.to_csv(path_processed_final, index=False)
return df_signal_final
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment