libROM  v1.0
Data-driven physical simulation library
STSampling.cpp
1 
11 #include "linalg/Matrix.h"
12 #include "Utilities.h"
13 #include "mpi.h"
14 #include <cmath>
15 #include <map>
16 #include <set>
17 
18 #include "STSampling.h"
19 
20 using namespace std;
21 
22 namespace CAROM {
23 
24 // This function implements Algorithm 2 of Choi and Carlberg 2019.
25 // All spatial indices are used here.
26 // The spatial basis s_basis is distributed, of size
27 // (number of spatial DOFs) x (number of basis vectors)
28 // The temporal basis t_basis is not distributed, stored in its entirety on each
29 // process, of size (number of temporal DOFs) x (number of basis vectors)
30 // In general, there may be multiple temporal basis vectors associated with each
31 // spatial basis vector, but here we assume for now that there is only one, i.e.
32 // column j of s_basis corresponds to column j of t_basis.
33 void SampleTemporalIndices(const Matrix & s_basis,
34  const Matrix & t_basis,
35  const int num_f_basis_vectors_used,
36  int* t_samples,
37  const int myid,
38  const int num_procs,
39  const int num_samples_req,
40  const bool excludeFinalTime)
41 {
42  CAROM_VERIFY(!t_basis.distributed());
43 
44  // Get the number of basis vectors and the size of each basis vector.
45  CAROM_VERIFY(0 < num_f_basis_vectors_used
46  && num_f_basis_vectors_used <= s_basis.numColumns()
47  && num_f_basis_vectors_used <= t_basis.numColumns());
48  CAROM_VERIFY(num_samples_req > 0);
49  const int num_basis_vectors =
50  std::min(num_f_basis_vectors_used, s_basis.numColumns());
51  const int num_samples = num_samples_req;
52  const int numExcluded = excludeFinalTime ? 1 : 0;
53  CAROM_VERIFY(num_samples <= t_basis.numRows() - numExcluded);
54  const int s_size = s_basis.numRows();
55  const int t_size = t_basis.numRows();
56 
57  const int ns_mod_nr = num_samples % num_basis_vectors;
58  int ns = 0;
59 
60  // The small matrix inverted by the algorithm. We'll allocate the largest
61  // matrix we'll need and set its size at each step in the algorithm.
62  Matrix M(num_basis_vectors, num_basis_vectors,
63  false); // TODO: is this big enough to avoid reallocations?
64 
65  std::set<int> samples; // Temporal samples, identical on all processes
66 
67  std::vector<double> error(s_size * t_size);
68  std::vector<double> MZphi(num_basis_vectors);
69 
70  // Initialize error as the first space-time basis vector
71  for (int s=0; s<s_size; ++s)
72  {
73  for (int t=0; t<t_size; ++t)
74  error[t + (s*t_size)] = s_basis.item(s, 0) * t_basis.item(t, 0);
75  }
76 
77  for (int i=0; i<num_basis_vectors; ++i)
78  {
79  CAROM_VERIFY(samples.size() == ns);
80  if (i > 0)
81  {
82  // Set error vector to (I - [\phi_0, ..., \phi_{i-1}] (Z [\phi_0, ..., \phi_{i-1}])^+ Z) \phi_i
83  // where \phi_j is the j-th space-time basis vector (tensor product of columns j of s_basis and t_basis).
84 
85  // First, set M to Z [\phi_0, ..., \phi_{i-1}], where Z selects all spatial indices and the temporal indices in `samples`
86  M.setSize(ns * s_size, i);
87 
88  int ti = 0;
89  for (auto t : samples) // loop over ns values
90  {
91  for (int s = 0; s < s_size; ++s)
92  {
93  const int row = ti + (s*ns);
94  for (int col = 0; col < i; ++col)
95  {
96  M.item(row, col) = s_basis.item(s, col) * t_basis.item(t, col);
97  }
98  }
99 
100  ti++;
101  }
102 
103  // Compute the pseudo-inverse of M, storing its transpose.
104  M.transposePseudoinverse();
105 
106  // TODO: in parallel, it seems this matrix should be distributed, and the pseudo-inverse needs to be computed in parallel.
107  // In DEIM and GNAT, we gather the non-local rows that have been sampled, but here we use all spatial rows, so this will be
108  // more expensive. Fortunately, the matrix to be inverted in the pseudo-inverse computation will still be small.
109  // Hopefully M just needs to be declared as `distributed`.
110 
111  // Multiply Z\phi_i by the transpose of M, which is (Z [\phi_0, ..., \phi_{i-1}])^+.
112  // The result is a vector of length i, stored in MZphi.
113  for (int j = 0; j < i; ++j) MZphi[j] = 0.0;
114 
115  ti = 0;
116  for (auto t : samples) // loop over ns values
117  {
118  for (int s = 0; s < s_size; ++s)
119  {
120  const int row = ti + (s*ns);
121  const double phi_i_s_t = s_basis.item(s, i) * t_basis.item(t,
122  i); // \phi_i(s,t)
123 
124  for (int col = 0; col < i; ++col)
125  {
126  MZphi[col] += M.item(row, col) * phi_i_s_t;
127  }
128  }
129 
130  ti++;
131  }
132 
133  // Initialize error as space-time basis vector i
134  for (int s=0; s<s_size; ++s)
135  {
136  for (int t=0; t<t_size; ++t)
137  error[t + (s*t_size)] = s_basis.item(s, i) * t_basis.item(t, i);
138  }
139 
140  // Multiply MZphi by [\phi_0, ..., \phi_{i-1}], subtracting the result from error
141  for (int s=0; s<s_size; ++s)
142  {
143  for (int t=0; t<t_size; ++t)
144  {
145  for (int j=0; j<i; ++j)
146  error[t + (s*t_size)] -= MZphi[j] * s_basis.item(s, j) * t_basis.item(t, j);
147  }
148  }
149 
150  // Now error is set.
151  }
152 
153  const int nsi = i < ns_mod_nr ? (num_samples / num_basis_vectors) + 1 :
154  num_samples / num_basis_vectors;
155 
156  for (int j=0; j<nsi; ++j)
157  {
158  // Find the temporal index not sampled yet that has the maximum l2 spatial error norm
159 
160  double maxNorm = -1.0;
161  int tmax = 0;
162  for (int t=0; t<t_size - numExcluded; ++t)
163  {
164  std::set<int>::const_iterator found = samples.find(t);
165  if (found != samples.end())
166  continue;
167 
168  double norm2 = 0.0;
169  for (int s=0; s<s_size; ++s)
170  {
171  norm2 += error[t + (s*t_size)] * error[t + (s*t_size)];
172  }
173 
174  // TODO: this Allreduce is expensive for every t. Can this be improved?
175  double globalNorm2 = 0.0;
176  MPI_Allreduce(&norm2, &globalNorm2, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
177  if (globalNorm2 > maxNorm)
178  {
179  maxNorm = globalNorm2;
180  tmax = t;
181  }
182  }
183 
184  samples.insert(tmax);
185  }
186 
187  ns += nsi;
188  }
189 
190  CAROM_VERIFY(num_samples == ns);
191 
192  int ti = 0;
193  for (auto t : samples)
194  {
195  t_samples[ti] = t;
196  ti++;
197  }
198 }
199 
200 // This function implements Algorithm 3 of Choi and Carlberg 2019.
201 // Temporal samples are input.
202 // Spatial basis s_basis is distributed, of size (number of spatial DOFs) x (number of basis vectors)
203 // Temporal basis t_basis is stored in its entirety on each process, of size (number of temporal DOFs) x (number of basis vectors)
204 // In general, there may be multiple temporal basis vectors associated with each spatial basis vector, but here we assume for now
205 // that there is only one, i.e. column j of s_basis corresponds to column j of t_basis.
206 void SampleSpatialIndices(const Matrix & s_basis,
207  const Matrix & t_basis,
208  const int num_f_basis_vectors_used,
209  const int num_t_samples,
210  int* t_samples,
211  int* s_sampled_row,
212  int* s_sampled_rows_per_proc,
213  Matrix& f_basis_sampled,
214  const int myid,
215  const int num_procs,
216  const int num_samples_req)
217 {
218  // This algorithm determines the rows of f that should be sampled, the
219  // processor that owns each sampled row, and fills f_basis_sampled with
220  // the sampled rows of the basis of the RHS.
221 
222  // Create an MPI_Datatype for the RowInfo struct.
223  MPI_Datatype MaxRowType, oldtypes[2];
224  int blockcounts[2];
225  MPI_Aint offsets[2], extent, lower_bound;
226  MPI_Status stat;
227  offsets[0] = 0;
228  oldtypes[0] = MPI_DOUBLE;
229  blockcounts[0] = 1;
230 
231  /*
232  MPI_Type_extent is deprecated in MPI-2 and removed in
233  MPI-3. MPI_Type_extent has been replaced by MPI_Type_get_extent.
234  */
235 
236 #if (MPI_VERSION == 1)
237  MPI_Type_extent(MPI_DOUBLE, &extent);
238 #else
239  MPI_Type_get_extent(MPI_DOUBLE, &lower_bound, &extent);
240 #endif
241 
242  offsets[1] = extent;
243  oldtypes[1] = MPI_INT;
244  blockcounts[1] = 2;
245 
246  /*
247  MPI_Type_struct is deprecated in MPI-2 and removed in
248  MPI-3. MPI_Type_struct has been replaced by MPI_Type_create_struct.
249  */
250 
251 #if (MPI_VERSION == 1)
252  MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
253 #else
254  MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
255 #endif
256 
257  MPI_Type_commit(&MaxRowType);
258 
259  // Create an MPI_Op for the RowInfoMax function.
260  MPI_Op RowInfoOp;
261  MPI_Op_create((MPI_User_function*)RowInfoMax, true, &RowInfoOp);
262 
263  //CAROM_VERIFY(!t_basis.distributed());
264 
265  // Get the number of basis vectors and the size of each basis vector.
266  CAROM_VERIFY(0 < num_f_basis_vectors_used
267  && num_f_basis_vectors_used <= s_basis.numColumns()
268  && num_f_basis_vectors_used <= t_basis.numColumns());
269  CAROM_VERIFY(num_samples_req >
270  0); // TODO: just replace num_samples with num_samples_req
271  const int num_basis_vectors =
272  std::min(num_f_basis_vectors_used, s_basis.numColumns());
273  //const int num_samples = num_samples_req > 0 ? num_samples_req : num_basis_vectors;
274  const int num_samples = num_samples_req;
275  CAROM_VERIFY(num_basis_vectors <= num_samples
276  && num_samples <= s_basis.numRows());
277  CAROM_VERIFY(num_samples == f_basis_sampled.numRows()
278  && num_basis_vectors == f_basis_sampled.numColumns());
279  CAROM_VERIFY(!f_basis_sampled.distributed());
280  const int s_size = s_basis.numRows();
281  const int t_size = t_basis.numRows();
282 
283  const int ns_mod_nr = num_samples % num_basis_vectors;
284  int ns = 0;
285 
286  // The small matrix inverted by the algorithm. We'll allocate the largest
287  // matrix we'll need and set its size at each step in the algorithm.
288  Matrix M(num_basis_vectors, num_basis_vectors,
289  false); // TODO: is this big enough to avoid reallocations?
290 
291  // TODO: change these variable names
292  std::vector<std::set<int> > proc_sampled_f_row(num_procs);
293  std::vector<std::map<int, int> > proc_f_row_to_tmp_fs_row(num_procs);
294  int num_f_basis_cols =
295  f_basis_sampled.numColumns(); // TODO: just use num_basis_vectors
296  Matrix tmp_fs(f_basis_sampled.numRows(),
297  num_f_basis_cols,
298  f_basis_sampled.distributed()); // TODO: should this be distributed?
299 
300  // TODO: change these variable names
301  RowInfo f_bv_max_local, f_bv_max_global;
302 
303  std::vector<double> error(s_size * t_size);
304  std::vector<double> MZphi(num_basis_vectors);
305  std::vector<double> sampled_row(num_basis_vectors);
306 
307  // Initialize error as the first space-time basis vector
308  for (int s=0; s<s_size; ++s)
309  {
310  for (int t=0; t<t_size; ++t)
311  error[t + (s*t_size)] = s_basis.item(s, 0) * t_basis.item(t, 0);
312  }
313 
314  for (int i=0; i<num_basis_vectors; ++i)
315  {
316  if (i > 0)
317  {
318  // Set error vector to (I - [\phi_0, ..., \phi_{i-1}] (Z [\phi_0, ..., \phi_{i-1}])^+ Z) \phi_i
319  // where \phi_j is the j-th space-time basis vector (tensor product of columns j of s_basis and t_basis).
320 
321  // First, set M to Z [\phi_0, ..., \phi_{i-1}], where Z selects spatial indices in `samples` and the temporal indices in `t_samples`
322 
323  /* // Distributed version: should this be used?
324  M.setSize(proc_sampled_f_row[myid].size() * num_t_samples, i);
325 
326  int si = 0;
327  for (auto s : proc_sampled_f_row[myid]) // loop over local spatial sample indices
328  {
329  for (int ti = 0; ti < num_t_samples; ++ti)
330  {
331  const int t = t_samples[ti];
332  const int row = ti + (si*num_t_samples);
333  for (int col = 0; col < i; ++col)
334  {
335  M.item(row, col) = s_basis.item(s, col) * t_basis.item(t, col);
336  }
337  }
338 
339  si++;
340  }
341  */
342 
343  // Set M to be the global sampled matrix and invert on each process.
344  M.setSize(ns * num_t_samples, i);
345  for (int k = 0; k < i; ++k)
346  for (int j = 0; j < ns; ++j)
347  {
348  for (int ti = 0; ti < num_t_samples; ++ti)
349  {
350  const int t = t_samples[ti];
351  const int row = ti + (j*num_t_samples);
352 
353  M.item(row, k) = tmp_fs.item(j, k) * t_basis.item(t, k);
354  }
355  }
356 
357  // Compute the pseudo-inverse of M, storing its transpose.
358  M.transposePseudoinverse();
359 
360  // Multiply Z\phi_i by the transpose of M, which is (Z [\phi_0, ..., \phi_{i-1}])^+.
361  // The result is a vector of length i, stored in MZphi.
362  for (int j = 0; j < i; ++j) MZphi[j] = 0.0;
363 
364  /*
365  // Distributed version: should this be used?
366 
367  // TODO: in parallel, it seems this matrix should be distributed, and the pseudo-inverse needs to be computed in parallel.
368  // In DEIM and GNAT, we gather the non-local spatial rows that have been sampled. This is so far just a serial implementation.
369 
370  si = 0;
371  for (auto s : proc_sampled_f_row[myid]) // loop over local spatial sample indices
372  {
373  for (int ti = 0; ti < num_t_samples; ++ti)
374  {
375  const int t = t_samples[ti];
376  const int row = ti + (si*num_t_samples);
377  const double phi_i_s_t = s_basis.item(s, i) * t_basis.item(t, i); // \phi_i(s,t)
378  for (int col = 0; col < i; ++col)
379  {
380  MZphi[col] += M.item(row, col) * phi_i_s_t;
381  }
382  }
383 
384  si++;
385  }
386 
387  CAROM_VERIFY(si * num_t_samples == M.numRows());
388  */
389 
390  for (int j = 0; j < ns; ++j)
391  {
392  for (int ti = 0; ti < num_t_samples; ++ti)
393  {
394  const int t = t_samples[ti];
395  const int row = ti + (j*num_t_samples);
396 
397  const double phi_i_s_t = s_basis.item(j, i) * t_basis.item(t,
398  i); // \phi_i(s,t)
399 
400  for (int k = 0; k < i; ++k)
401  MZphi[k] += M.item(row, k) * phi_i_s_t;
402  }
403  }
404 
405  // Initialize error as space-time basis vector i
406  for (int s=0; s<s_size; ++s)
407  {
408  for (int t=0; t<t_size; ++t)
409  error[t + (s*t_size)] = s_basis.item(s, i) * t_basis.item(t, i);
410  }
411 
412  // Multiply MZphi by [\phi_0, ..., \phi_{i-1}], subtracting the result from error
413  for (int s=0; s<s_size; ++s)
414  {
415  for (int t=0; t<t_size; ++t)
416  {
417  for (int j=0; j<i; ++j)
418  error[t + (s*t_size)] -= MZphi[j] * s_basis.item(s, j) * t_basis.item(t, j);
419  }
420  }
421 
422  // Now error is set.
423  }
424 
425  const int nsi = i < ns_mod_nr ? (num_samples / num_basis_vectors) + 1 :
426  num_samples / num_basis_vectors;
427 
428  for (int j=0; j<nsi; ++j)
429  {
430  // Find the spatial index not sampled yet that has the maximum l2 temporal error norm.
431  // First find the process-local maximum.
432 
433  double maxNorm = -1.0;
434  int smax = 0;
435  for (int s=0; s<s_size; ++s)
436  {
437  std::set<int>::const_iterator found = proc_sampled_f_row[myid].find(s);
438  if (found != proc_sampled_f_row[myid].end())
439  continue;
440 
441  double norm2 = 0.0;
442  for (int t=0; t<t_size; ++t)
443  {
444  norm2 += error[t + (s*t_size)] * error[t + (s*t_size)];
445  }
446 
447  if (norm2 > maxNorm)
448  {
449  maxNorm = norm2;
450  smax = s;
451  }
452  }
453 
454  // The local maximum with respect spatial indices is (maxNorm, smax).
455  // Now find the global maximum.
456  //RowInfo f_bv_max_local, f_bv_max_global;
457  f_bv_max_local.row_val = maxNorm;
458  f_bv_max_local.row = smax;
459  f_bv_max_local.proc = myid;
460 
461  MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
462  MaxRowType, RowInfoOp, MPI_COMM_WORLD);
463 
464  proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
465  proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = ns + j;
466 
467  // Now get the sampled row of the spatial basis.
468  if (f_bv_max_global.proc == myid) {
469  for (int k = 0; k < num_basis_vectors; ++k) {
470  sampled_row[k] = s_basis.item(f_bv_max_global.row, k);
471  }
472  }
473  MPI_Bcast(sampled_row.data(), num_basis_vectors, MPI_DOUBLE,
474  f_bv_max_global.proc, MPI_COMM_WORLD);
475  // Now add the sampled row of the basis to tmp_fs.
476  for (int k = 0; k < num_basis_vectors; ++k) {
477  tmp_fs.item(ns+j, k) = sampled_row[k];
478  }
479  }
480 
481  ns += nsi;
482  }
483 
484  CAROM_VERIFY(num_samples == ns);
485 
486  // Fill f_sampled_row, and f_sampled_rows_per_proc. Unscramble tmp_fs into
487  // f_basis_sampled.
488  int idx = 0;
489  for (int i = 0; i < num_procs; ++i) {
490  std::set<int>& this_proc_sampled_f_row = proc_sampled_f_row[i];
491  std::map<int, int>& this_proc_f_row_to_tmp_fs_row =
492  proc_f_row_to_tmp_fs_row[i];
493  s_sampled_rows_per_proc[i] = this_proc_sampled_f_row.size();
494  for (std::set<int>::iterator j = this_proc_sampled_f_row.begin();
495  j != this_proc_sampled_f_row.end(); ++j) {
496  int this_f_row = *j;
497  s_sampled_row[idx] = this_f_row;
498  int tmp_fs_row = this_proc_f_row_to_tmp_fs_row[this_f_row];
499  for (int col = 0; col < num_f_basis_cols; ++col) {
500  f_basis_sampled.item(idx, col) = tmp_fs.item(tmp_fs_row, col);
501  }
502  ++idx;
503  }
504  }
505 
506  CAROM_VERIFY(num_samples == idx);
507 
508  // Free the MPI_Datatype and MPI_Op.
509  MPI_Type_free(&MaxRowType);
510  MPI_Op_free(&RowInfoOp);
511 }
512 
513 void SpaceTimeSampling(const Matrix & s_basis,
514  const Matrix & t_basis,
515  const int num_f_basis_vectors_used,
516  std::vector<int>& t_samples,
517  int* f_sampled_row,
518  int* f_sampled_rows_per_proc,
519  Matrix& s_basis_sampled,
520  const int myid,
521  const int num_procs,
522  const int num_t_samples_req,
523  const int num_s_samples_req,
524  const bool excludeFinalTime)
525 {
526  // There are multiple possible algorithms for space-time sampling. For now, we just implement
527  // one algorithm, but in the future there could be options for other algorithms.
528  // This algorithm is sequential greedy sampling of temporal then spatial indices.
529 
530  // TODO: for now, we assume one temporal basis vector for each spatial basis vector. This should be generalized.
531  CAROM_VERIFY(s_basis.numColumns() == num_f_basis_vectors_used
532  && t_basis.numColumns() == num_f_basis_vectors_used);
533 
534  // First, sample temporal indices.
535  CAROM_VERIFY(t_samples.size() == num_t_samples_req);
536 
537  SampleTemporalIndices(s_basis, t_basis, num_f_basis_vectors_used,
538  t_samples.data(),
539  myid, num_procs, num_t_samples_req, excludeFinalTime);
540 
541  // Second, sample spatial indices.
542  //Matrix s_basis_sampled(num_s_samples_req, num_f_basis_vectors_used, false);
543  CAROM_VERIFY(s_basis_sampled.numRows() == num_s_samples_req
544  && s_basis_sampled.numColumns() == num_f_basis_vectors_used);
545 
546  //std::vector<int> s_samples(num_s_samples_req);
547  //std::vector<int> s_samples_per_proc(num_procs);
548  SampleSpatialIndices(s_basis, t_basis, num_f_basis_vectors_used,
549  num_t_samples_req,
550  //t_samples.data(), s_samples.data(), s_samples_per_proc.data(),
551  t_samples.data(), f_sampled_row, f_sampled_rows_per_proc,
552  s_basis_sampled, myid, num_procs, num_s_samples_req);
553 }
554 
555 void GetSampledSpaceTimeBasis(std::vector<int> const& t_samples,
556  const Matrix& t_basis,
557  Matrix const& s_basis_sampled,
558  Matrix& f_basis_sampled_inv)
559 {
560  const int num_s_samples = s_basis_sampled.numRows();
561  const int num_t_samples = t_samples.size();
562 
563  // Set sampled space-time basis matrix and take its pseudo-inverse, in f_basis_sampled_inv.
564 
565  CAROM_VERIFY(f_basis_sampled_inv.numRows() == num_t_samples * num_s_samples);
566 
567  for (int si=0; si<num_s_samples; ++si)
568  {
569  for (int ti=0; ti<num_t_samples; ++ti)
570  {
571  const int row = ti + (si*num_t_samples);
572  const int t = t_samples[ti];
573  for (int j=0; j<f_basis_sampled_inv.numColumns(); ++j)
574  f_basis_sampled_inv.item(row, j) = s_basis_sampled.item(si,
575  j) * t_basis.item(t, j);
576  }
577  }
578 
579  // Compute the pseudo-inverse of f_basis_sampled_inv, storing its transpose.
580  f_basis_sampled_inv.transposePseudoinverse();
581 }
582 
583 }
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
Definition: Matrix.h:226
int numRows() const
Returns the number of rows of the Matrix on this processor.
Definition: Matrix.h:200
void SpaceTimeSampling(const Matrix &s_basis, const Matrix &t_basis, const int num_f_basis_vectors_used, std::vector< int > &t_samples, int *f_sampled_row, int *f_sampled_rows_per_proc, Matrix &s_basis_sampled, const int myid, const int num_procs, const int num_t_samples_req, const int num_s_samples_req, const bool excludeFinalTime)
Definition: STSampling.cpp:513