1 """Helper functions for creating synthetic data
2
3 Robert Clewley, 2006, 2007.
4
5 """
6
7 from PyDSTool import *
8 from numpy import random, array, dot, zeros, transpose
9 from numpy.linalg import norm, inv
10
11 _generators = ['generate_swirl', 'generate_discspiral', 'generate_hypercube',
12 'generate_spiral']
13
14 _hypercuboid_utils = ['sup_norm', 'inf_norm', 'maxs', 'mins',
15 'max_euclidean_distance']
16
17 __all__ = _generators + _hypercuboid_utils
18
19
20
21 -def generate_swirl(N, L, zmin, zmax, cycle_rate, bias_x, bias_y, roll_num=0,
22 noise=0, fromcentre=True):
23 """Generate N points on a 3D 'swirl' (a cyclic trajectory on a
24 1 x L "swiss roll" manifold).
25
26 cycle_rate is measured in radians.
27 roll_num is the number of full rolls in the swiss roll.
28
29 The origin is at the centre of the set unless fromcentre=False.
30 """
31 X = zeros((N, 3), float)
32 r = 0.5
33 assert L > 0
34 assert zmax >= 1
35 assert zmin > 0
36 assert zmax > zmin
37 zscale = (zmax - zmin)/L
38 def roll(x):
39 r = zmax - x*zscale
40 rho = x/L*(roll_num*2*pi)
41 return (r*cos(rho), r*sin(rho))
42 theta = 0
43 for i in xrange(N):
44 theta += cycle_rate
45 x = r*cos(theta)+bias_x*i
46 x_rolled, z_rolled = roll(x)
47 X[i,:] = array([x_rolled + random.normal(0,noise),
48 r*sin(theta) + bias_y*i + random.normal(0,noise),
49 z_rolled + random.normal(0,noise)])
50 return X
51
52
55 """Generate N1 points on a planar disc and N2 points on up to two spirals
56 attached to it, also in the plane. The data are embedded in an ambient
57 space of dimension D.
58 The cycle_rate (radians per point) determines how much of the spiral is
59 created.
60 num_spirals can be 0, 1, or 2
61 """
62 assert D > 1
63 assert num_spirals in [0,1,2]
64 X = zeros((N1+num_spirals*N2, D), float)
65
66 assert radius >= 1
67 X[0:N1,:2] = generate_ball(N1, 2, radius)
68 for i in range(N1):
69 X[i,2:D] = array([random.normal(0,noise)]*(D-2), float)
70
71 theta = sqrt(radius*radius-1)
72 for i in xrange(N2):
73 theta += cycle_rate
74 r = sqrt(theta*theta+0.8)
75 X[N1+i,:] = array([r*cos(theta)+random.normal(0,noise),
76 r*sin(theta)+random.normal(0,noise)] + \
77 [random.normal(0,noise)]*(D-2))
78 if num_spirals == 2:
79 X[N1+N2+i,:] = array([r*cos(theta+pi)+random.normal(0,noise),
80 r*sin(theta+pi)+random.normal(0,noise)] + \
81 [random.normal(0,noise)]*(D-2))
82 return X
83
84
85 -def generate_spiral(N, D, radius, cycle_rate, expand_rate=1.,
86 num_spirals=1, noise=0):
87 """Generate N points on up to two spirals in the plane.
88 The data are embedded in an ambient space of dimension D.
89 The cycle_rate (radians per point) determines how much of the spiral is
90 created.
91 radius is the starting radius of the spiral.
92 num_spirals can be 0, 1, or 2
93 """
94 assert D > 1
95 assert num_spirals in [0,1,2]
96 X = zeros((num_spirals*N, D), float)
97 theta = sqrt(radius*radius-1)
98 for i in xrange(N):
99 theta += cycle_rate
100 r = sqrt(theta*theta+0.8)*expand_rate
101 X[i,:] = array([r*cos(theta)+random.normal(0,noise),
102 r*sin(theta)+random.normal(0,noise)] + \
103 [random.normal(0,noise)]*(D-2))
104 if num_spirals == 2:
105 X[N+i,:] = array([r*cos(theta+pi)+random.normal(0,noise),
106 r*sin(theta+pi)+random.normal(0,noise)] + \
107 [random.normal(0,noise)]*(D-2))
108 return X
109
111 """Generate a length-N uniformly-distributed random set of data points in a
112 D-dimensional hypercube having side-length given by third parameter.
113
114 The origin is at the centre of the set unless fromcentre=False.
115 """
116 X = zeros((N, D), float)
117 if fromcentre:
118 for i in xrange(N):
119 X[i,:] = array([random.uniform(-length/2., length/2.) for j in xrange(D)],
120 float)
121 else:
122 for i in xrange(N):
123 X[i,:] = array([random.uniform(0, length) for j in xrange(D)],
124 float)
125 return X
126
128 """Generate a length-N uniformly-distributed random set of data points in a
129 D-dimensional ball of radius given by the third parameter. Points are found
130 by elimination of points from hypercubes and thus this method is not
131 recommended for large D!
132 """
133 X = zeros((N, D), float)
134 twopi = 2*pi
135 assert D > 1
136 foundpts = 0
137 outsidepts = 0
138 while foundpts < N:
139 hcube = generate_hypercube(N, D, 2*radius)
140 for x in hcube:
141
142 if norm(x) < radius:
143 X[foundpts,:] = x
144 foundpts += 1
145
146 if foundpts == N:
147 break
148 else:
149 outsidepts += 1
150
151 print "Number of points outside ball that were discarded = ", outsidepts
152 return X
153
154
155
156
157
158
160 """sup norm for data distributed in a hypercuboid with given lengths."""
161 return max(maxs(p), lengths)
162
164 """inf norm for data distributed in a hypercuboid with given lengths."""
165 return min(mins(p), lengths)
166
167 -def maxs(p, lengths=None):
168 """Return ordered array of max distances from a point to the edges of
169 a hypercuboid.
170 """
171 if lengths is None:
172 lengths = [1.] * len(p)
173 a = array([max([p[i],lengths[i]-p[i]]) for i in xrange(len(p))])
174 a.sort()
175 return a
176
177 -def mins(p, lengths=None):
178 """Return ordered array of min distances from a point to the edges of
179 a hypercuboid.
180 """
181 if lengths is None:
182 lengths = [1.] * len(p)
183 a = array([min([p[i],lengths[i]-p[i]]) for i in xrange(len(p))])
184 a.sort()
185 return a
186
188 """Returns the largest Euclidean distance possible in a D-dimensional
189 hypercuboid with given side-lengths."""
190 return norm(lengths, 2)
191