From 546e91719bd7a311c8597a3e513f397f3a559d01 Mon Sep 17 00:00:00 2001 From: Sonja Tripkovic Date: Wed, 21 Apr 2021 08:00:06 +0200 Subject: [PATCH] added simulation.py --- measprocess/simulation.py | 103 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 measprocess/simulation.py diff --git a/measprocess/simulation.py b/measprocess/simulation.py new file mode 100644 index 0000000..13e6a80 --- /dev/null +++ b/measprocess/simulation.py @@ -0,0 +1,103 @@ +from shapely.geometry import Point, LineString, Polygon +import geopandas as gpd +import pandas as pd +import matplotlib.pyplot as plt +import numpy as np +from random import random +from scipy import linalg +import os + +def TCP_on_lines(street_series_equidistant : gpd.GeoSeries, lambdaParent : float, lambdaDaughter : float, sigmaDaughter : float)-> (gpd.GeoSeries, pd.DataFrame): + ''' + Generate TCP points along the LineStrings in geoseries. + + :param street_series_equidistant: geopandas geoseries with equidistant nodes + :param lambdaParent: density of parent points + :param lambdaDaughter: density of daughter points + :param sigmaDaughter: spread of daughter points around parent points + + :return: + geopandas geoseries containing x and y locations in 'EPSG:4326' projection + pandas dataframe with columns: x,y,clusterID,xParent,yParent in the original projection + ''' + if street_series_equidistant.crs == "EPSG:4326": + raise ValueError("Make sure to pass projected data (not long and lat).") + + parents_per_street = np.random.poisson(street_series_equidistant.length * lambdaParent) # nummber of clusters per street + + parents = [] + for j,linestring in enumerate(street_series_equidistant): + for _ in range(parents_per_street[j]): #(generate this number for each line using TCP, and then use this to place cluster centers along the lines) + pt = linestring.interpolate(random(), True) + parents.append(pt) + PARENTS = gpd.GeoSeries(parents) + daughters_per_cluster = np.random.poisson(lambdaDaughter, parents_per_street.sum()) # number of points inside each cluster + numbPoints = sum(daughters_per_cluster)# total number of points + + # Generate the (relative) locations in Cartesian coordinates by simulating independent normal variables + xx0 = np.random.normal(0, sigmaDaughter, numbPoints) # (relative) x coordinaets + yy0 = np.random.normal(0, sigmaDaughter, numbPoints) # (relative) y coordinates + + # replicate parent points (ie centres of disks/clusters) + xx = np.repeat(np.array(PARENTS.x), daughters_per_cluster) + yy = np.repeat(np.array(PARENTS.y), daughters_per_cluster) + + # translate points (ie parents points are the centres of cluster disks) + xx += xx0 + yy += yy0 + + # create pandas df (denote group (cluster) to which point (x,y) belongs to) + groups = np.arange(daughters_per_cluster.shape[0]) + col3 = np.repeat(groups, daughters_per_cluster, axis=0) + xParent = np.repeat(np.array(PARENTS.x), daughters_per_cluster, axis=0) + yParent = np.repeat(np.array(PARENTS.y), daughters_per_cluster, axis=0) + ALL = np.stack((xx,yy,col3,xParent,yParent),axis = 1) + df_all = pd.DataFrame(ALL) + df_all.columns = ['x', 'y', 'clusterID','xParent','yParent'] + + tcp_geoseries= gpd.GeoSeries(map(Point, zip(df_all.x, df_all.y))).set_crs('EPSG:31287').to_crs('EPSG:4326') + return tcp_geoseries, df_all + +def kernelSqExp(X1, X2, l=1.0, sigma_f=1.0): + ''' + Isotropic squared exponential kernel. Computes a covariance matrix from points in X1 and X2. + + :param X1: Array of m points (m x d). + :param X2: Array of n points (n x d). + + :return: Covariance matrix (m x n). + ''' + sqdist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) - 2 * np.dot(X1, X2.T) + return sigma_f**2 * np.exp(-0.5 / l**2 * sqdist) + +def ShadowFading(df_test : pd.DataFrame, df_train : pd.DataFrame, DD : float, sigma_f : float) -> (pd.DataFrame, pd.DataFrame): + ''' + Generates shadow fading values at given test and train locations by sampling from multivariate Gaussian. + One of the procedures for sampling from a multivariate Gaussian distribution is as follows: + Let X have a n-dimensional Gaussian distribution N(μ,Σ). We wish to generate a sample form X. + 1) Find a matrix A, such that Σ=A*AT. This is possible using Cholesky decomposition, where A is the Cholesky factor of Σ. + 2) Generate a vector Z=(Z1,…,Zn)T of independent, standard normal variables. (n = n_test + n_train) + 3) Let X=μ+AZ. (mean can be zero) + X in step 3 is the sample we are looking for. + + :param df_test: pandas dataframe of test points containing columns 'x' and 'y' as coordinates + :param df_test: pandas dataframe of train points containing columns 'x' and 'y' as coordinates + :param DD: deccorelation distance + :param sigma_f: signal standard deviation + ''' + TestPoints = df_test[['x', 'y']] + TrainPoints = df_train[['x', 'y']] + + AllPoints = np.append(TestPoints, TrainPoints,axis=0) + AllPoints_min = AllPoints.min(axis=0) + AllPoints = AllPoints - AllPoints_min + # compute squared exponential kernel + kernel = kernelSqExp(AllPoints,AllPoints,DD,sigma_f) + A = linalg.cholesky(np.add(kernel,1e-10*np.eye(kernel.shape[0])), lower= True) + Z = np.random.normal(0.0, 1.0, AllPoints.shape[0]) + X = A.dot(Z) + + df_test['value'] = X[0:TestPoints.shape[0]] + df_train['value'] = X[TestPoints.shape[0]:] + + return df_test, df_train \ No newline at end of file -- 2.22.0