Commit 546e9171 authored by Sonja Tripkovic's avatar Sonja Tripkovic

added simulation.py

parent 07791eff
from shapely.geometry import Point, LineString, Polygon
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from random import random
from scipy import linalg
import os
def TCP_on_lines(street_series_equidistant : gpd.GeoSeries, lambdaParent : float, lambdaDaughter : float, sigmaDaughter : float)-> (gpd.GeoSeries, pd.DataFrame):
'''
Generate TCP points along the LineStrings in geoseries.
:param street_series_equidistant: geopandas geoseries with equidistant nodes
:param lambdaParent: density of parent points
:param lambdaDaughter: density of daughter points
:param sigmaDaughter: spread of daughter points around parent points
:return:
geopandas geoseries containing x and y locations in 'EPSG:4326' projection
pandas dataframe with columns: x,y,clusterID,xParent,yParent in the original projection
'''
if street_series_equidistant.crs == "EPSG:4326":
raise ValueError("Make sure to pass projected data (not long and lat).")
parents_per_street = np.random.poisson(street_series_equidistant.length * lambdaParent) # nummber of clusters per street
parents = []
for j,linestring in enumerate(street_series_equidistant):
for _ in range(parents_per_street[j]): #(generate this number for each line using TCP, and then use this to place cluster centers along the lines)
pt = linestring.interpolate(random(), True)
parents.append(pt)
PARENTS = gpd.GeoSeries(parents)
daughters_per_cluster = np.random.poisson(lambdaDaughter, parents_per_street.sum()) # number of points inside each cluster
numbPoints = sum(daughters_per_cluster)# total number of points
# Generate the (relative) locations in Cartesian coordinates by simulating independent normal variables
xx0 = np.random.normal(0, sigmaDaughter, numbPoints) # (relative) x coordinaets
yy0 = np.random.normal(0, sigmaDaughter, numbPoints) # (relative) y coordinates
# replicate parent points (ie centres of disks/clusters)
xx = np.repeat(np.array(PARENTS.x), daughters_per_cluster)
yy = np.repeat(np.array(PARENTS.y), daughters_per_cluster)
# translate points (ie parents points are the centres of cluster disks)
xx += xx0
yy += yy0
# create pandas df (denote group (cluster) to which point (x,y) belongs to)
groups = np.arange(daughters_per_cluster.shape[0])
col3 = np.repeat(groups, daughters_per_cluster, axis=0)
xParent = np.repeat(np.array(PARENTS.x), daughters_per_cluster, axis=0)
yParent = np.repeat(np.array(PARENTS.y), daughters_per_cluster, axis=0)
ALL = np.stack((xx,yy,col3,xParent,yParent),axis = 1)
df_all = pd.DataFrame(ALL)
df_all.columns = ['x', 'y', 'clusterID','xParent','yParent']
tcp_geoseries= gpd.GeoSeries(map(Point, zip(df_all.x, df_all.y))).set_crs('EPSG:31287').to_crs('EPSG:4326')
return tcp_geoseries, df_all
def kernelSqExp(X1, X2, l=1.0, sigma_f=1.0):
'''
Isotropic squared exponential kernel. Computes a covariance matrix from points in X1 and X2.
:param X1: Array of m points (m x d).
:param X2: Array of n points (n x d).
:return: Covariance matrix (m x n).
'''
sqdist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) - 2 * np.dot(X1, X2.T)
return sigma_f**2 * np.exp(-0.5 / l**2 * sqdist)
def ShadowFading(df_test : pd.DataFrame, df_train : pd.DataFrame, DD : float, sigma_f : float) -> (pd.DataFrame, pd.DataFrame):
'''
Generates shadow fading values at given test and train locations by sampling from multivariate Gaussian.
One of the procedures for sampling from a multivariate Gaussian distribution is as follows:
Let X have a n-dimensional Gaussian distribution N(μ,Σ). We wish to generate a sample form X.
1) Find a matrix A, such that Σ=A*AT. This is possible using Cholesky decomposition, where A is the Cholesky factor of Σ.
2) Generate a vector Z=(Z1,…,Zn)T of independent, standard normal variables. (n = n_test + n_train)
3) Let X=μ+AZ. (mean can be zero)
X in step 3 is the sample we are looking for.
:param df_test: pandas dataframe of test points containing columns 'x' and 'y' as coordinates
:param df_test: pandas dataframe of train points containing columns 'x' and 'y' as coordinates
:param DD: deccorelation distance
:param sigma_f: signal standard deviation
'''
TestPoints = df_test[['x', 'y']]
TrainPoints = df_train[['x', 'y']]
AllPoints = np.append(TestPoints, TrainPoints,axis=0)
AllPoints_min = AllPoints.min(axis=0)
AllPoints = AllPoints - AllPoints_min
# compute squared exponential kernel
kernel = kernelSqExp(AllPoints,AllPoints,DD,sigma_f)
A = linalg.cholesky(np.add(kernel,1e-10*np.eye(kernel.shape[0])), lower= True)
Z = np.random.normal(0.0, 1.0, AllPoints.shape[0])
X = A.dot(Z)
df_test['value'] = X[0:TestPoints.shape[0]]
df_train['value'] = X[TestPoints.shape[0]:]
return df_test, df_train
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment