libROM  v1.0
Data-driven physical simulation library
STSampling.cpp
1 
11 #include "linalg/Matrix.h"
12 #include "Utilities.h"
13 #include "mpi.h"
14 #include <cmath>
15 #include <map>
16 #include <set>
17 
18 #include "STSampling.h"
19 
20 using namespace std;
21 
22 namespace CAROM {
23 
24 // This function implements Algorithm 2 of Choi and Carlberg 2019.
25 // All spatial indices are used here.
26 // Spatial basis s_basis is distributed, of size (number of spatial DOFs) x (number of basis vectors)
27 // Temporal basis t_basis is stored in its entirety on each process, of size (number of temporal DOFs) x (number of basis vectors)
28 // In general, there may be multiple temporal basis vectors associated with each spatial basis vector, but here we assume for now
29 // that there is only one, i.e. column j of s_basis corresponds to column j of t_basis.
30 void SampleTemporalIndices(const Matrix* s_basis,
31  const Matrix* t_basis,
32  const int num_f_basis_vectors_used,
33  int* t_samples,
34  const int myid,
35  const int num_procs,
36  const int num_samples_req,
37  const bool excludeFinalTime)
38 {
39  CAROM_VERIFY(t_basis->distributed());
40 
41  // Get the number of basis vectors and the size of each basis vector.
42  CAROM_VERIFY(0 < num_f_basis_vectors_used
43  && num_f_basis_vectors_used <= s_basis->numColumns()
44  && num_f_basis_vectors_used <= t_basis->numColumns());
45  CAROM_VERIFY(num_samples_req > 0);
46  const int num_basis_vectors =
47  std::min(num_f_basis_vectors_used, s_basis->numColumns());
48  const int num_samples = num_samples_req;
49  const int numExcluded = excludeFinalTime ? 1 : 0;
50  CAROM_VERIFY(num_samples <= t_basis->numRows() - numExcluded);
51  const int s_size = s_basis->numRows();
52  const int t_size = t_basis->numRows();
53 
54  const int ns_mod_nr = num_samples % num_basis_vectors;
55  int ns = 0;
56 
57  // The small matrix inverted by the algorithm. We'll allocate the largest
58  // matrix we'll need and set its size at each step in the algorithm.
59  Matrix M(num_basis_vectors, num_basis_vectors,
60  false); // TODO: is this big enough to avoid reallocations?
61 
62  std::set<int> samples; // Temporal samples, identical on all processes
63 
64  std::vector<double> error(s_size * t_size);
65  std::vector<double> MZphi(num_basis_vectors);
66 
67  // Initialize error as the first space-time basis vector
68  for (int s=0; s<s_size; ++s)
69  {
70  for (int t=0; t<t_size; ++t)
71  error[t + (s*t_size)] = s_basis->item(s, 0) * t_basis->item(t, 0);
72  }
73 
74  for (int i=0; i<num_basis_vectors; ++i)
75  {
76  CAROM_VERIFY(samples.size() == ns);
77  if (i > 0)
78  {
79  // Set error vector to (I - [\phi_0, ..., \phi_{i-1}] (Z [\phi_0, ..., \phi_{i-1}])^+ Z) \phi_i
80  // where \phi_j is the j-th space-time basis vector (tensor product of columns j of s_basis and t_basis).
81 
82  // First, set M to Z [\phi_0, ..., \phi_{i-1}], where Z selects all spatial indices and the temporal indices in `samples`
83  M.setSize(ns * s_size, i);
84 
85  int ti = 0;
86  for (auto t : samples) // loop over ns values
87  {
88  for (int s = 0; s < s_size; ++s)
89  {
90  const int row = ti + (s*ns);
91  for (int col = 0; col < i; ++col)
92  {
93  M.item(row, col) = s_basis->item(s, col) * t_basis->item(t, col);
94  }
95  }
96 
97  ti++;
98  }
99 
100  // Compute the pseudo-inverse of M, storing its transpose.
101  M.transposePseudoinverse();
102 
103  // TODO: in parallel, it seems this matrix should be distributed, and the pseudo-inverse needs to be computed in parallel.
104  // In DEIM and GNAT, we gather the non-local rows that have been sampled, but here we use all spatial rows, so this will be
105  // more expensive. Fortunately, the matrix to be inverted in the pseudo-inverse computation will still be small.
106  // Hopefully M just needs to be declared as `distributed`.
107 
108  // Multiply Z\phi_i by the transpose of M, which is (Z [\phi_0, ..., \phi_{i-1}])^+.
109  // The result is a vector of length i, stored in MZphi.
110  for (int j = 0; j < i; ++j) MZphi[j] = 0.0;
111 
112  ti = 0;
113  for (auto t : samples) // loop over ns values
114  {
115  for (int s = 0; s < s_size; ++s)
116  {
117  const int row = ti + (s*ns);
118  const double phi_i_s_t = s_basis->item(s, i) * t_basis->item(t,
119  i); // \phi_i(s,t)
120 
121  for (int col = 0; col < i; ++col)
122  {
123  MZphi[col] += M.item(row, col) * phi_i_s_t;
124  }
125  }
126 
127  ti++;
128  }
129 
130  // Initialize error as space-time basis vector i
131  for (int s=0; s<s_size; ++s)
132  {
133  for (int t=0; t<t_size; ++t)
134  error[t + (s*t_size)] = s_basis->item(s, i) * t_basis->item(t, i);
135  }
136 
137  // Multiply MZphi by [\phi_0, ..., \phi_{i-1}], subtracting the result from error
138  for (int s=0; s<s_size; ++s)
139  {
140  for (int t=0; t<t_size; ++t)
141  {
142  for (int j=0; j<i; ++j)
143  error[t + (s*t_size)] -= MZphi[j] * s_basis->item(s, j) * t_basis->item(t, j);
144  }
145  }
146 
147  // Now error is set.
148  }
149 
150  const int nsi = i < ns_mod_nr ? (num_samples / num_basis_vectors) + 1 :
151  num_samples / num_basis_vectors;
152 
153  for (int j=0; j<nsi; ++j)
154  {
155  // Find the temporal index not sampled yet that has the maximum l2 spatial error norm
156 
157  double maxNorm = -1.0;
158  int tmax = 0;
159  for (int t=0; t<t_size - numExcluded; ++t)
160  {
161  std::set<int>::const_iterator found = samples.find(t);
162  if (found != samples.end())
163  continue;
164 
165  double norm2 = 0.0;
166  for (int s=0; s<s_size; ++s)
167  {
168  norm2 += error[t + (s*t_size)] * error[t + (s*t_size)];
169  }
170 
171  // TODO: this Allreduce is expensive for every t. Can this be improved?
172  double globalNorm2 = 0.0;
173  MPI_Allreduce(&norm2, &globalNorm2, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
174  if (globalNorm2 > maxNorm)
175  {
176  maxNorm = globalNorm2;
177  tmax = t;
178  }
179  }
180 
181  samples.insert(tmax);
182  }
183 
184  ns += nsi;
185  }
186 
187  CAROM_VERIFY(num_samples == ns);
188 
189  int ti = 0;
190  for (auto t : samples)
191  {
192  t_samples[ti] = t;
193  ti++;
194  }
195 }
196 
197 // This function implements Algorithm 3 of Choi and Carlberg 2019.
198 // Temporal samples are input.
199 // Spatial basis s_basis is distributed, of size (number of spatial DOFs) x (number of basis vectors)
200 // Temporal basis t_basis is stored in its entirety on each process, of size (number of temporal DOFs) x (number of basis vectors)
201 // In general, there may be multiple temporal basis vectors associated with each spatial basis vector, but here we assume for now
202 // that there is only one, i.e. column j of s_basis corresponds to column j of t_basis.
203 void SampleSpatialIndices(const Matrix* s_basis,
204  const Matrix* t_basis,
205  const int num_f_basis_vectors_used,
206  const int num_t_samples,
207  int* t_samples,
208  int* s_sampled_row,
209  int* s_sampled_rows_per_proc,
210  Matrix& f_basis_sampled,
211  const int myid,
212  const int num_procs,
213  const int num_samples_req)
214 {
215  // This algorithm determines the rows of f that should be sampled, the
216  // processor that owns each sampled row, and fills f_basis_sampled with
217  // the sampled rows of the basis of the RHS.
218 
219  // Create an MPI_Datatype for the RowInfo struct.
220  MPI_Datatype MaxRowType, oldtypes[2];
221  int blockcounts[2];
222  MPI_Aint offsets[2], extent, lower_bound;
223  MPI_Status stat;
224  offsets[0] = 0;
225  oldtypes[0] = MPI_DOUBLE;
226  blockcounts[0] = 1;
227 
228  /*
229  MPI_Type_extent is deprecated in MPI-2 and removed in
230  MPI-3. MPI_Type_extent has been replaced by MPI_Type_get_extent.
231  */
232 
233 #if (MPI_VERSION == 1)
234  MPI_Type_extent(MPI_DOUBLE, &extent);
235 #else
236  MPI_Type_get_extent(MPI_DOUBLE, &lower_bound, &extent);
237 #endif
238 
239  offsets[1] = extent;
240  oldtypes[1] = MPI_INT;
241  blockcounts[1] = 2;
242 
243  /*
244  MPI_Type_struct is deprecated in MPI-2 and removed in
245  MPI-3. MPI_Type_struct has been replaced by MPI_Type_create_struct.
246  */
247 
248 #if (MPI_VERSION == 1)
249  MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
250 #else
251  MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
252 #endif
253 
254  MPI_Type_commit(&MaxRowType);
255 
256  // Create an MPI_Op for the RowInfoMax function.
257  MPI_Op RowInfoOp;
258  MPI_Op_create((MPI_User_function*)RowInfoMax, true, &RowInfoOp);
259 
260  //CAROM_VERIFY(!t_basis->distributed());
261 
262  // Get the number of basis vectors and the size of each basis vector.
263  CAROM_VERIFY(0 < num_f_basis_vectors_used
264  && num_f_basis_vectors_used <= s_basis->numColumns()
265  && num_f_basis_vectors_used <= t_basis->numColumns());
266  CAROM_VERIFY(num_samples_req >
267  0); // TODO: just replace num_samples with num_samples_req
268  const int num_basis_vectors =
269  std::min(num_f_basis_vectors_used, s_basis->numColumns());
270  //const int num_samples = num_samples_req > 0 ? num_samples_req : num_basis_vectors;
271  const int num_samples = num_samples_req;
272  CAROM_VERIFY(num_basis_vectors <= num_samples
273  && num_samples <= s_basis->numRows());
274  CAROM_VERIFY(num_samples == f_basis_sampled.numRows()
275  && num_basis_vectors == f_basis_sampled.numColumns());
276  CAROM_VERIFY(!f_basis_sampled.distributed());
277  const int s_size = s_basis->numRows();
278  const int t_size = t_basis->numRows();
279 
280  const int ns_mod_nr = num_samples % num_basis_vectors;
281  int ns = 0;
282 
283  // The small matrix inverted by the algorithm. We'll allocate the largest
284  // matrix we'll need and set its size at each step in the algorithm.
285  Matrix M(num_basis_vectors, num_basis_vectors,
286  false); // TODO: is this big enough to avoid reallocations?
287 
288  // TODO: change these variable names
289  std::vector<std::set<int> > proc_sampled_f_row(num_procs);
290  std::vector<std::map<int, int> > proc_f_row_to_tmp_fs_row(num_procs);
291  int num_f_basis_cols =
292  f_basis_sampled.numColumns(); // TODO: just use num_basis_vectors
293  Matrix tmp_fs(f_basis_sampled.numRows(),
294  num_f_basis_cols,
295  f_basis_sampled.distributed()); // TODO: should this be distributed?
296 
297  // TODO: change these variable names
298  RowInfo f_bv_max_local, f_bv_max_global;
299 
300  std::vector<double> error(s_size * t_size);
301  std::vector<double> MZphi(num_basis_vectors);
302  std::vector<double> sampled_row(num_basis_vectors);
303 
304  // Initialize error as the first space-time basis vector
305  for (int s=0; s<s_size; ++s)
306  {
307  for (int t=0; t<t_size; ++t)
308  error[t + (s*t_size)] = s_basis->item(s, 0) * t_basis->item(t, 0);
309  }
310 
311  for (int i=0; i<num_basis_vectors; ++i)
312  {
313  if (i > 0)
314  {
315  // Set error vector to (I - [\phi_0, ..., \phi_{i-1}] (Z [\phi_0, ..., \phi_{i-1}])^+ Z) \phi_i
316  // where \phi_j is the j-th space-time basis vector (tensor product of columns j of s_basis and t_basis).
317 
318  // First, set M to Z [\phi_0, ..., \phi_{i-1}], where Z selects spatial indices in `samples` and the temporal indices in `t_samples`
319 
320  /* // Distributed version: should this be used?
321  M.setSize(proc_sampled_f_row[myid].size() * num_t_samples, i);
322 
323  int si = 0;
324  for (auto s : proc_sampled_f_row[myid]) // loop over local spatial sample indices
325  {
326  for (int ti = 0; ti < num_t_samples; ++ti)
327  {
328  const int t = t_samples[ti];
329  const int row = ti + (si*num_t_samples);
330  for (int col = 0; col < i; ++col)
331  {
332  M.item(row, col) = s_basis->item(s, col) * t_basis->item(t, col);
333  }
334  }
335 
336  si++;
337  }
338  */
339 
340  // Set M to be the global sampled matrix and invert on each process.
341  M.setSize(ns * num_t_samples, i);
342  for (int k = 0; k < i; ++k)
343  for (int j = 0; j < ns; ++j)
344  {
345  for (int ti = 0; ti < num_t_samples; ++ti)
346  {
347  const int t = t_samples[ti];
348  const int row = ti + (j*num_t_samples);
349 
350  M.item(row, k) = tmp_fs.item(j, k) * t_basis->item(t, k);
351  }
352  }
353 
354  // Compute the pseudo-inverse of M, storing its transpose.
355  M.transposePseudoinverse();
356 
357  // Multiply Z\phi_i by the transpose of M, which is (Z [\phi_0, ..., \phi_{i-1}])^+.
358  // The result is a vector of length i, stored in MZphi.
359  for (int j = 0; j < i; ++j) MZphi[j] = 0.0;
360 
361  /*
362  // Distributed version: should this be used?
363 
364  // TODO: in parallel, it seems this matrix should be distributed, and the pseudo-inverse needs to be computed in parallel.
365  // In DEIM and GNAT, we gather the non-local spatial rows that have been sampled. This is so far just a serial implementation.
366 
367  si = 0;
368  for (auto s : proc_sampled_f_row[myid]) // loop over local spatial sample indices
369  {
370  for (int ti = 0; ti < num_t_samples; ++ti)
371  {
372  const int t = t_samples[ti];
373  const int row = ti + (si*num_t_samples);
374  const double phi_i_s_t = s_basis->item(s, i) * t_basis->item(t, i); // \phi_i(s,t)
375  for (int col = 0; col < i; ++col)
376  {
377  MZphi[col] += M.item(row, col) * phi_i_s_t;
378  }
379  }
380 
381  si++;
382  }
383 
384  CAROM_VERIFY(si * num_t_samples == M.numRows());
385  */
386 
387  for (int j = 0; j < ns; ++j)
388  {
389  for (int ti = 0; ti < num_t_samples; ++ti)
390  {
391  const int t = t_samples[ti];
392  const int row = ti + (j*num_t_samples);
393 
394  const double phi_i_s_t = s_basis->item(j, i) * t_basis->item(t,
395  i); // \phi_i(s,t)
396 
397  for (int k = 0; k < i; ++k)
398  MZphi[k] += M.item(row, k) * phi_i_s_t;
399  }
400  }
401 
402  // Initialize error as space-time basis vector i
403  for (int s=0; s<s_size; ++s)
404  {
405  for (int t=0; t<t_size; ++t)
406  error[t + (s*t_size)] = s_basis->item(s, i) * t_basis->item(t, i);
407  }
408 
409  // Multiply MZphi by [\phi_0, ..., \phi_{i-1}], subtracting the result from error
410  for (int s=0; s<s_size; ++s)
411  {
412  for (int t=0; t<t_size; ++t)
413  {
414  for (int j=0; j<i; ++j)
415  error[t + (s*t_size)] -= MZphi[j] * s_basis->item(s, j) * t_basis->item(t, j);
416  }
417  }
418 
419  // Now error is set.
420  }
421 
422  const int nsi = i < ns_mod_nr ? (num_samples / num_basis_vectors) + 1 :
423  num_samples / num_basis_vectors;
424 
425  for (int j=0; j<nsi; ++j)
426  {
427  // Find the spatial index not sampled yet that has the maximum l2 temporal error norm.
428  // First find the process-local maximum.
429 
430  double maxNorm = -1.0;
431  int smax = 0;
432  for (int s=0; s<s_size; ++s)
433  {
434  std::set<int>::const_iterator found = proc_sampled_f_row[myid].find(s);
435  if (found != proc_sampled_f_row[myid].end())
436  continue;
437 
438  double norm2 = 0.0;
439  for (int t=0; t<t_size; ++t)
440  {
441  norm2 += error[t + (s*t_size)] * error[t + (s*t_size)];
442  }
443 
444  if (norm2 > maxNorm)
445  {
446  maxNorm = norm2;
447  smax = s;
448  }
449  }
450 
451  // The local maximum with respect spatial indices is (maxNorm, smax).
452  // Now find the global maximum.
453  //RowInfo f_bv_max_local, f_bv_max_global;
454  f_bv_max_local.row_val = maxNorm;
455  f_bv_max_local.row = smax;
456  f_bv_max_local.proc = myid;
457 
458  MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
459  MaxRowType, RowInfoOp, MPI_COMM_WORLD);
460 
461  proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
462  proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = ns + j;
463 
464  // Now get the sampled row of the spatial basis.
465  if (f_bv_max_global.proc == myid) {
466  for (int k = 0; k < num_basis_vectors; ++k) {
467  sampled_row[k] = s_basis->item(f_bv_max_global.row, k);
468  }
469  }
470  MPI_Bcast(sampled_row.data(), num_basis_vectors, MPI_DOUBLE,
471  f_bv_max_global.proc, MPI_COMM_WORLD);
472  // Now add the sampled row of the basis to tmp_fs.
473  for (int k = 0; k < num_basis_vectors; ++k) {
474  tmp_fs.item(ns+j, k) = sampled_row[k];
475  }
476  }
477 
478  ns += nsi;
479  }
480 
481  CAROM_VERIFY(num_samples == ns);
482 
483  // Fill f_sampled_row, and f_sampled_rows_per_proc. Unscramble tmp_fs into
484  // f_basis_sampled.
485  int idx = 0;
486  for (int i = 0; i < num_procs; ++i) {
487  std::set<int>& this_proc_sampled_f_row = proc_sampled_f_row[i];
488  std::map<int, int>& this_proc_f_row_to_tmp_fs_row =
489  proc_f_row_to_tmp_fs_row[i];
490  s_sampled_rows_per_proc[i] = this_proc_sampled_f_row.size();
491  for (std::set<int>::iterator j = this_proc_sampled_f_row.begin();
492  j != this_proc_sampled_f_row.end(); ++j) {
493  int this_f_row = *j;
494  s_sampled_row[idx] = this_f_row;
495  int tmp_fs_row = this_proc_f_row_to_tmp_fs_row[this_f_row];
496  for (int col = 0; col < num_f_basis_cols; ++col) {
497  f_basis_sampled.item(idx, col) = tmp_fs.item(tmp_fs_row, col);
498  }
499  ++idx;
500  }
501  }
502 
503  CAROM_VERIFY(num_samples == idx);
504 
505  // Free the MPI_Datatype and MPI_Op.
506  MPI_Type_free(&MaxRowType);
507  MPI_Op_free(&RowInfoOp);
508 }
509 
510 void SpaceTimeSampling(const Matrix* s_basis,
511  const Matrix* t_basis,
512  const int num_f_basis_vectors_used,
513  std::vector<int>& t_samples,
514  int* f_sampled_row,
515  int* f_sampled_rows_per_proc,
516  Matrix& s_basis_sampled,
517  const int myid,
518  const int num_procs,
519  const int num_t_samples_req,
520  const int num_s_samples_req,
521  const bool excludeFinalTime)
522 {
523  // There are multiple possible algorithms for space-time sampling. For now, we just implement
524  // one algorithm, but in the future there could be options for other algorithms.
525  // This algorithm is sequential greedy sampling of temporal then spatial indices.
526 
527  // TODO: for now, we assume one temporal basis vector for each spatial basis vector. This should be generalized.
528  CAROM_VERIFY(s_basis->numColumns() == num_f_basis_vectors_used
529  && t_basis->numColumns() == num_f_basis_vectors_used);
530 
531  // First, sample temporal indices.
532  CAROM_VERIFY(t_samples.size() == num_t_samples_req);
533 
534  SampleTemporalIndices(s_basis, t_basis, num_f_basis_vectors_used,
535  t_samples.data(),
536  myid, num_procs, num_t_samples_req, excludeFinalTime);
537 
538  // Second, sample spatial indices.
539  //Matrix s_basis_sampled(num_s_samples_req, num_f_basis_vectors_used, false);
540  CAROM_VERIFY(s_basis_sampled.numRows() == num_s_samples_req
541  && s_basis_sampled.numColumns() == num_f_basis_vectors_used);
542 
543  //std::vector<int> s_samples(num_s_samples_req);
544  //std::vector<int> s_samples_per_proc(num_procs);
545  SampleSpatialIndices(s_basis, t_basis, num_f_basis_vectors_used,
546  num_t_samples_req,
547  //t_samples.data(), s_samples.data(), s_samples_per_proc.data(),
548  t_samples.data(), f_sampled_row, f_sampled_rows_per_proc,
549  s_basis_sampled, myid, num_procs, num_s_samples_req);
550 }
551 
552 void GetSampledSpaceTimeBasis(std::vector<int> const& t_samples,
553  const Matrix* t_basis,
554  Matrix const& s_basis_sampled,
555  Matrix& f_basis_sampled_inv)
556 {
557  const int num_s_samples = s_basis_sampled.numRows();
558  const int num_t_samples = t_samples.size();
559 
560  // Set sampled space-time basis matrix and take its pseudo-inverse, in f_basis_sampled_inv.
561 
562  CAROM_VERIFY(f_basis_sampled_inv.numRows() == num_t_samples * num_s_samples);
563 
564  for (int si=0; si<num_s_samples; ++si)
565  {
566  for (int ti=0; ti<num_t_samples; ++ti)
567  {
568  const int row = ti + (si*num_t_samples);
569  const int t = t_samples[ti];
570  for (int j=0; j<f_basis_sampled_inv.numColumns(); ++j)
571  f_basis_sampled_inv.item(row, j) = s_basis_sampled.item(si,
572  j) * t_basis->item(t, j);
573  }
574  }
575 
576  // Compute the pseudo-inverse of f_basis_sampled_inv, storing its transpose.
577  f_basis_sampled_inv.transposePseudoinverse();
578 }
579 
580 }
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
Definition: Matrix.h:220
int numRows() const
Returns the number of rows of the Matrix on this processor.
Definition: Matrix.h:194
void SpaceTimeSampling(const Matrix *s_basis, const Matrix *t_basis, const int num_f_basis_vectors_used, std::vector< int > &t_samples, int *f_sampled_row, int *f_sampled_rows_per_proc, Matrix &s_basis_sampled, const int myid, const int num_procs, const int num_t_samples_req, const int num_s_samples_req, const bool excludeFinalTime)
Definition: STSampling.cpp:510