Commit db2ecede authored by Sonja Tripkovic's avatar Sonja Tripkovic

added interpolate_signal_loc() in rtr process

parent 9387dd8e
import os.path as path import os.path as path
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import geopandas as gpd
from scipy import interpolate
def process_4G(details, path_to_store_files): def process(details, path_to_store_files):
'''
Generates 2 csv files: signal.csv and location.csv and stores them in a folder defined under path_to_store_files
:param details: details returned by the function rtr_details() from fetch.py in rtr
:param path_to_store_files: existing path at which the generated csv files will be saved to
'''
d = pd.DataFrame(details) d = pd.DataFrame(details)
path_processed_signal = path.join(path_to_store_files,'signal.csv') path_processed_signal = path.join(path_to_store_files,'signal.csv')
path_processed_location = path.join(path_to_store_files,'location.csv') path_processed_location = path.join(path_to_store_files,'location.csv')
...@@ -52,4 +60,72 @@ def process_4G(details, path_to_store_files): ...@@ -52,4 +60,72 @@ def process_4G(details, path_to_store_files):
tests_locations, ignore_index=True tests_locations, ignore_index=True
).to_csv(path_processed_location, index=False) ).to_csv(path_processed_location, index=False)
print('\t\tDone.') print('\t\tDone.')
\ No newline at end of file
def interpolate_signal_loc(path_processed_signal, path_processed_location, path_processed_final=None):
'''
Interpolates gps location values based on path_processed_location for each not NaN lte_rsrp value.
:param path_processed_signal: path of signal.csv
:param path_processed_location: path of location.csv
:path_processed_final: path of final.csv where the returned dataframe is stored, this is optional, default is None
:return df_signal_final: pandas dataframe containing all not NaN values from signal.csv,
including interpolated longitude and latitude for each value
'''
df_signal = pd.read_csv(path_processed_signal)
df_loc = pd.read_csv(path_processed_location)
df_signal = df_signal[df_signal['cat_technology']=='4G'] #select only 4G techonology
df_signal = df_signal[df_signal['lte_rsrp'].notna()] # remove all where lte_rsrp value is missing
#grouping both dfs by id
df_signal_grouped = df_signal.groupby(['id'])
df_loc_grouped = df_loc.groupby(['id'])
#loop over each uuid in df_signal_grouped
df_signal_final=[]
i = 0
for ID, df_signal_selected in df_signal_grouped:
i=i+1
if (i % 50) == 0:
print('\t\t{}/{}'.format(i, len(df_signal_grouped)))
df_loc_selected = df_loc_grouped.get_group(ID) #get locations with same uuid
if len(df_loc_selected)<2: #we need at least 2 locations for interpolation, otherwise skip it
continue
lon_lat = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_loc_selected['long'], df_loc_selected['lat']))
timeseries_loc = df_loc_selected['time_elapsed'].to_numpy()
timeseries_signal = df_signal_selected['time_elapsed'].to_numpy()
# delete those where we would have to extrapolate
S = [i for i in timeseries_signal if i >= timeseries_loc.min() and i <= timeseries_loc.max()]
df_signal_selected = df_signal_selected[df_signal_selected['time_elapsed'].isin(S)]
timeseries_signal = df_signal_selected['time_elapsed'].to_numpy()
# do interpolation in EPSG:31287
lon_lat = lon_lat.set_crs("EPSG:4326")
x_y = lon_lat.to_crs("EPSG:31287")
interp_x = interpolate.interp1d(timeseries_loc, x_y.geometry.x.to_numpy())
xnew = interp_x(timeseries_signal)
interp_y = interpolate.interp1d(timeseries_loc, x_y.geometry.y.to_numpy())
ynew = interp_y(timeseries_signal)
df_signal_selected['x'] = xnew
df_signal_selected['y'] = ynew
df_signal_final.append(df_signal_selected)
df_signal_final = pd.concat(df_signal_final, ignore_index=True)
xy = gpd.GeoDataFrame(geometry=gpd.points_from_xy(df_signal_final.x, df_signal_final.y)).set_crs('EPSG:31287').to_crs('EPSG:4326')
df_signal_final.drop(['x','y'], axis=1, inplace=True) #remove x and y in epsg:31287
# add long and lat in epsg:4326
df_signal_final['long'] = xy.geometry.x
df_signal_final['lat'] = xy.geometry.y
if path_processed_final!=None:
df_signal_final.to_csv(path_processed_final, index=False)
return df_signal_final
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment