added simulation.py

546e9171 · Sonja Tripkovic · 07791eff · 546e9171
Commit 546e9171 authored Apr 21, 2021 by Sonja Tripkovic
Hide whitespace changes
Inline Side-by-side

Showing with 103 additions and 0 deletions

simulation.py measprocess/simulation.py +103 -0

No files found.
--- a/measprocess/simulation.py
+++ b/measprocess/simulation.py
+from shapely.geometry import Point, LineString, Polygon
+import geopandas as gpd
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+from random import random
+from scipy import linalg
+import os
+
+def TCP_on_lines(street_series_equidistant : gpd.GeoSeries, lambdaParent : float, lambdaDaughter : float, sigmaDaughter : float)-> (gpd.GeoSeries, pd.DataFrame):
+    '''
+    Generate TCP points along the LineStrings in geoseries.
+
+    :param street_series_equidistant: geopandas geoseries with equidistant nodes
+    :param lambdaParent: density of parent points
+    :param lambdaDaughter: density of daughter points
+    :param sigmaDaughter: spread of daughter points around parent points
+
+    :return: 
+        geopandas geoseries containing x and y locations in 'EPSG:4326' projection
+        pandas dataframe with columns: x,y,clusterID,xParent,yParent in the original projection
+    '''
+    if street_series_equidistant.crs == "EPSG:4326":
+        raise ValueError("Make sure to pass projected data (not long and lat).")
+
+    parents_per_street = np.random.poisson(street_series_equidistant.length * lambdaParent) # nummber of clusters per street
+    
+    parents = []
+    for j,linestring in enumerate(street_series_equidistant): 
+        for _ in range(parents_per_street[j]): #(generate this number for each line using TCP, and then use this to place cluster centers along the lines)
+            pt = linestring.interpolate(random(), True)
+            parents.append(pt)
+    PARENTS =  gpd.GeoSeries(parents)
+    daughters_per_cluster = np.random.poisson(lambdaDaughter, parents_per_street.sum()) # number of points inside each cluster
+    numbPoints = sum(daughters_per_cluster)# total number of points
+
+    # Generate the (relative) locations in Cartesian coordinates by simulating independent normal variables
+    xx0 = np.random.normal(0, sigmaDaughter, numbPoints) # (relative) x coordinaets
+    yy0 = np.random.normal(0, sigmaDaughter, numbPoints) # (relative) y coordinates
+
+    # replicate parent points (ie centres of disks/clusters)
+    xx = np.repeat(np.array(PARENTS.x), daughters_per_cluster)
+    yy = np.repeat(np.array(PARENTS.y), daughters_per_cluster)
+
+    # translate points (ie parents points are the centres of cluster disks)
+    xx += xx0
+    yy += yy0
+
+    # create pandas df (denote group (cluster) to which point (x,y) belongs to) 
+    groups = np.arange(daughters_per_cluster.shape[0])
+    col3 = np.repeat(groups, daughters_per_cluster, axis=0)
+    xParent = np.repeat(np.array(PARENTS.x), daughters_per_cluster, axis=0)
+    yParent = np.repeat(np.array(PARENTS.y), daughters_per_cluster, axis=0)
+    ALL = np.stack((xx,yy,col3,xParent,yParent),axis = 1)
+    df_all = pd.DataFrame(ALL)
+    df_all.columns = ['x', 'y', 'clusterID','xParent','yParent']
+    
+    tcp_geoseries= gpd.GeoSeries(map(Point, zip(df_all.x, df_all.y))).set_crs('EPSG:31287').to_crs('EPSG:4326')
+    return tcp_geoseries, df_all 
+
+def kernelSqExp(X1, X2, l=1.0, sigma_f=1.0):
+    '''
+    Isotropic squared exponential kernel. Computes a covariance matrix from points in X1 and X2. 
+
+    :param X1: Array of m points (m x d).
+    :param X2: Array of n points (n x d).
+   
+    :return: Covariance matrix (m x n).
+    '''
+    sqdist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) - 2 * np.dot(X1, X2.T)
+    return sigma_f**2 * np.exp(-0.5 / l**2 * sqdist)
+
+def ShadowFading(df_test : pd.DataFrame, df_train : pd.DataFrame, DD : float, sigma_f : float) -> (pd.DataFrame, pd.DataFrame):
+    '''
+    Generates shadow fading values at given test and train locations by sampling from multivariate Gaussian.
+    One of the procedures for sampling from a multivariate Gaussian distribution is as follows:
+    Let X have a n-dimensional Gaussian distribution N(μ,Σ). We wish to generate a sample form X.
+    1) Find a matrix A, such that Σ=A*AT. This is possible using Cholesky decomposition, where A is the Cholesky factor of Σ.
+    2) Generate a vector Z=(Z1,…,Zn)T of independent, standard normal variables. (n = n_test + n_train)
+    3) Let X=μ+AZ. (mean can be zero)
+    X in step 3 is the sample we are looking for. 
+
+    :param df_test: pandas dataframe of test points containing columns 'x' and 'y' as coordinates
+    :param df_test: pandas dataframe of train points containing columns 'x' and 'y' as coordinates
+    :param DD: deccorelation distance
+    :param sigma_f: signal standard deviation
+    '''
+    TestPoints = df_test[['x', 'y']]
+    TrainPoints = df_train[['x', 'y']]
+    
+    AllPoints = np.append(TestPoints, TrainPoints,axis=0)
+    AllPoints_min = AllPoints.min(axis=0) 
+    AllPoints = AllPoints - AllPoints_min 
+    # compute squared exponential kernel
+    kernel = kernelSqExp(AllPoints,AllPoints,DD,sigma_f) 
+    A = linalg.cholesky(np.add(kernel,1e-10*np.eye(kernel.shape[0])), lower= True)
+    Z = np.random.normal(0.0, 1.0, AllPoints.shape[0])
+    X = A.dot(Z)
+
+    df_test['value'] = X[0:TestPoints.shape[0]]
+    df_train['value'] = X[TestPoints.shape[0]:]
+
+    return df_test, df_train
\ No newline at end of file