libROM  v1.0
Data-driven physical simulation library
GNAT.cpp
1 
11 // Description: Interface to the GNAT algorithm to determine the rows of the
12 // rhs to be sampled for the interpolation of the rhs.
13 
14 #include "linalg/Matrix.h"
15 #include "Utilities.h"
16 #include "mpi.h"
17 #include <cmath>
18 #include <vector>
19 #include <map>
20 #include <set>
21 #include <algorithm>
22 
23 #include "GNAT.h"
24 
25 namespace CAROM {
26 
27 void GNAT(const Matrix* f_basis,
28  const int num_f_basis_vectors_used,
29  std::vector<int>& f_sampled_row,
30  std::vector<int>& f_sampled_rows_per_proc,
31  Matrix& f_basis_sampled_inv,
32  const int myid,
33  const int num_procs,
34  const int num_samples_req,
35  std::vector<int> *init_samples)
36 {
37  CAROM_VERIFY(num_procs == f_sampled_rows_per_proc.size());
38 
39  // This algorithm determines the rows of f that should be sampled, the
40  // processor that owns each sampled row, and fills f_basis_sampled_inv with
41  // the inverse of the sampled rows of the basis of the RHS.
42 
43  // Create an MPI_Datatype for the RowInfo struct.
44  MPI_Datatype MaxRowType, oldtypes[2];
45  int blockcounts[2];
46  MPI_Aint offsets[2], extent, lower_bound;
47  MPI_Status stat;
48  offsets[0] = 0;
49  oldtypes[0] = MPI_DOUBLE;
50  blockcounts[0] = 1;
51 
52  /*
53  MPI_Type_extent is deprecated in MPI-2 and removed in
54  MPI-3. MPI_Type_extent has been replaced by MPI_Type_get_extent.
55  */
56 #if (MPI_VERSION == 1)
57  MPI_Type_extent(MPI_DOUBLE, &extent);
58 #else
59  MPI_Type_get_extent(MPI_DOUBLE, &lower_bound, &extent);
60 #endif
61 
62  offsets[1] = extent;
63  oldtypes[1] = MPI_INT;
64  blockcounts[1] = 2;
65 
66  /*
67  MPI_Type_struct is deprecated in MPI-2 and removed in
68  MPI-3. MPI_Type_struct has been replaced by MPI_Type_create_struct.
69  */
70 #if (MPI_VERSION == 1)
71  MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
72 #else
73  MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
74 #endif
75 
76  MPI_Type_commit(&MaxRowType);
77 
78  // Create an MPI_Op for the RowInfoMax function.
79  MPI_Op RowInfoOp;
80  MPI_Op_create((MPI_User_function*)RowInfoMax, true, &RowInfoOp);
81 
82  // Get the number of basis vectors and the size of each basis vector.
83  CAROM_VERIFY(0 < num_f_basis_vectors_used
84  && num_f_basis_vectors_used <= f_basis->numColumns());
85  const int num_basis_vectors =
86  std::min(num_f_basis_vectors_used, f_basis->numColumns());
87  const int num_samples = num_samples_req > 0 ? num_samples_req :
88  num_basis_vectors;
89  CAROM_VERIFY(num_basis_vectors <= num_samples
90  && num_samples <= f_basis->numDistributedRows());
91  CAROM_VERIFY(num_samples == f_sampled_row.size());
92  CAROM_VERIFY(num_samples == f_basis_sampled_inv.numRows()
93  && num_basis_vectors == f_basis_sampled_inv.numColumns());
94  CAROM_VERIFY(!f_basis_sampled_inv.distributed());
95  const int basis_size = f_basis->numRows();
96 
97  const int ns_mod_nr = num_samples % num_basis_vectors;
98  int ns = 0;
99 
100  // The small matrix inverted by the algorithm. We allocate the largest
101  // size needed and set the size at each step in the algorithm.
102  Matrix M(num_samples, std::max(num_basis_vectors-1, 1), false);
103 
104  // Scratch space used throughout the algorithm.
105  double* c = new double [num_basis_vectors];
106  double* sampled_row = new double [num_basis_vectors];
107 
108  std::vector<std::set<int> > proc_sampled_f_row(num_procs);
109  std::vector<std::map<int, int> > proc_f_row_to_tmp_fs_row(num_procs);
110  int num_f_basis_cols = f_basis_sampled_inv.numColumns();
111  Matrix tmp_fs(f_basis_sampled_inv.numRows(),
112  num_f_basis_cols,
113  f_basis_sampled_inv.distributed());
114 
115  // Gather information about initial samples given as input.
116  const int num_init_samples = init_samples ? init_samples->size() : 0;
117  int total_num_init_samples = 0;
118  MPI_Allreduce(&num_init_samples, &total_num_init_samples, 1,
119  MPI_INT, MPI_SUM, MPI_COMM_WORLD);
120 
121  int init_sample_offset = 0;
122  if (total_num_init_samples > 0)
123  {
124  CAROM_VERIFY(init_samples);
125  std::vector<int> all_num_init_samples(num_procs);
126  std::vector<int> all_init_samples(total_num_init_samples);
127 
128  MPI_Allgather(&num_init_samples, 1, MPI_INT, all_num_init_samples.data(),
129  1, MPI_INT, MPI_COMM_WORLD);
130 
131  for (int i = 0; i < myid; ++i)
132  {
133  init_sample_offset += all_num_init_samples[i];
134  }
135  }
136 
137  // Figure out the first sampled rows of the RHS.
138  RowInfo f_bv_max_local, f_bv_max_global;
139 
140  const int ns0 = 0 < ns_mod_nr ? (num_samples / num_basis_vectors) + 1 :
141  num_samples / num_basis_vectors;
142 
143  for (int k=0; k<ns0; ++k)
144  {
145  f_bv_max_local.row_val = -1.0;
146  f_bv_max_local.proc = myid;
147 
148  if (k < total_num_init_samples)
149  {
150  // Take sample from init_samples on the corresponding process
151  if (k >= init_sample_offset && k - init_sample_offset < num_init_samples)
152  {
153  f_bv_max_local.row_val = 1.0; // arbitrary number, ensuring maximum
154  f_bv_max_local.row = (*init_samples)[k - init_sample_offset];
155  }
156  }
157  else
158  {
159  // Compute sample by the greedy algorithm
160  for (int i = 0; i < basis_size; ++i) {
161  // Check whether this row has already been sampled.
162  std::set<int>::const_iterator found = proc_sampled_f_row[myid].find(i);
163  if (found == proc_sampled_f_row[myid].end()) // not found
164  {
165  double f_bv_val = fabs(f_basis->item(i, 0));
166  if (f_bv_val > f_bv_max_local.row_val) {
167  f_bv_max_local.row_val = f_bv_val;
168  f_bv_max_local.row = i;
169  }
170  }
171  }
172  }
173 
174  MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
175  MaxRowType, RowInfoOp, MPI_COMM_WORLD);
176 
177  // Now get the first sampled row of the basis of the RHS.
178  if (f_bv_max_global.proc == myid) {
179  for (int j = 0; j < num_basis_vectors; ++j) {
180  sampled_row[j] = f_basis->item(f_bv_max_global.row, j);
181  }
182  }
183  MPI_Bcast(sampled_row, num_basis_vectors, MPI_DOUBLE,
184  f_bv_max_global.proc, MPI_COMM_WORLD);
185  // Now add the first sampled row of the basis of the RHS to tmp_fs.
186  for (int j = 0; j < num_basis_vectors; ++j) {
187  tmp_fs.item(k, j) = sampled_row[j];
188  }
189  proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
190  proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = k;
191  }
192 
193  ns += ns0;
194 
195  // Now repeat the process for the other sampled rows of the basis of the RHS.
196  for (int i = 1; i < num_basis_vectors; ++i) {
197  const int nsi = i < ns_mod_nr ? (num_samples / num_basis_vectors) + 1 :
198  num_samples / num_basis_vectors;
199 
200  // If we currently know about S sampled rows of the basis of the RHS then
201  // M contains the first i columns of those S sampled rows.
202  M.setSize(ns, i);
203  for (int row = 0; row < ns; ++row) {
204  for (int col = 0; col < i; ++col) {
205  M.item(row, col) = tmp_fs.item(row, col);
206  }
207  }
208 
209  // Compute the pseudo-inverse of M, storing its transpose.
211 
212  // Now compute c, the inverse of M times the next column of the sampled
213  // rows of the basis of the RHS.
214  for (int minv_row = 0; minv_row < i; ++minv_row) {
215  double tmp = 0.0;
216  for (int minv_col = 0; minv_col < ns; ++minv_col) {
217  if (ns == i)
218  {
219  tmp += M.item(minv_row, minv_col)*tmp_fs.item(minv_col, i);
220  }
221  else
222  {
223  // Transposing M^+, which is stored as its transpose.
224  tmp += M.item(minv_col, minv_row)*tmp_fs.item(minv_col, i);
225  }
226  }
227  c[minv_row] = tmp;
228  }
229 
230  for (int k=0; k<nsi; ++k)
231  {
232  // Now figure out the next sampled row of the basis of f.
233  // Compute the first S basis vectors of the RHS times c and find the
234  // row of this product have the greatest absolute value. This is the
235  // next sampled row of the basis of f.
236  f_bv_max_local.row_val = -1.0;
237  f_bv_max_local.proc = myid;
238 
239  if (ns + k < total_num_init_samples)
240  {
241  // Take sample from init_samples on the corresponding process
242  if (ns + k >= init_sample_offset
243  && ns + k - init_sample_offset < num_init_samples)
244  {
245  f_bv_max_local.row_val = 1.0; // arbitrary number, ensuring maximum
246  f_bv_max_local.row = (*init_samples)[ns + k - init_sample_offset];
247  }
248  }
249  else
250  {
251  // Compute sample by the greedy algorithm
252  for (int F_row = 0; F_row < basis_size; ++F_row) {
253  // Check whether this row has already been sampled.
254  std::set<int>::const_iterator found = proc_sampled_f_row[myid].find(F_row);
255  if (found == proc_sampled_f_row[myid].end()) // not found
256  {
257  double tmp = 0.0;
258  for (int F_col = 0; F_col < i; ++F_col) {
259  tmp += f_basis->item(F_row, F_col)*c[F_col];
260  }
261  const double r_val = fabs(f_basis->item(F_row, i) - tmp);
262 
263  if (r_val > f_bv_max_local.row_val) {
264  f_bv_max_local.row_val = r_val;
265  f_bv_max_local.row = F_row;
266  }
267  }
268  }
269  }
270 
271  MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
272  MaxRowType, RowInfoOp, MPI_COMM_WORLD);
273 
274  // Now get the next sampled row of the basis of f.
275  if (f_bv_max_global.proc == myid) {
276  for (int j = 0; j < num_basis_vectors; ++j) {
277  sampled_row[j] = f_basis->item(f_bv_max_global.row, j);
278  }
279  }
280  MPI_Bcast(sampled_row, num_basis_vectors, MPI_DOUBLE,
281  f_bv_max_global.proc, MPI_COMM_WORLD);
282  // Now add the ith sampled row of the basis of the RHS to tmp_fs.
283  for (int j = 0; j < num_basis_vectors; ++j) {
284  tmp_fs.item(ns+k, j) = sampled_row[j];
285  }
286  proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
287  proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = ns+k;
288  }
289 
290  ns += nsi;
291  }
292 
293  CAROM_ASSERT(num_samples == ns);
294 
295  // Fill f_sampled_row, and f_sampled_rows_per_proc. Unscramble tmp_fs into
296  // f_basis_sampled_inv.
297  int idx = 0;
298  for (int i = 0; i < num_procs; ++i) {
299  std::set<int>& this_proc_sampled_f_row = proc_sampled_f_row[i];
300  std::map<int, int>& this_proc_f_row_to_tmp_fs_row =
301  proc_f_row_to_tmp_fs_row[i];
302  f_sampled_rows_per_proc[i] = this_proc_sampled_f_row.size();
303  for (std::set<int>::iterator j = this_proc_sampled_f_row.begin();
304  j != this_proc_sampled_f_row.end(); ++j) {
305  int this_f_row = *j;
306  f_sampled_row[idx] = this_f_row;
307  int tmp_fs_row = this_proc_f_row_to_tmp_fs_row[this_f_row];
308  for (int col = 0; col < num_f_basis_cols; ++col) {
309  f_basis_sampled_inv.item(idx, col) = tmp_fs.item(tmp_fs_row, col);
310  }
311  ++idx;
312  }
313  }
314 
315  CAROM_ASSERT(num_samples == idx);
316 
317  // Now invert f_basis_sampled_inv, storing its transpose.
318  f_basis_sampled_inv.transposePseudoinverse();
319 
320  // Free the MPI_Datatype and MPI_Op.
321  MPI_Type_free(&MaxRowType);
322  MPI_Op_free(&RowInfoOp);
323 
324  delete [] c;
325  delete [] sampled_row;
326 }
327 
328 }
void setSize(int num_rows, int num_cols)
Sets the number of rows and columns of the matrix and reallocates storage if needed....
Definition: Matrix.h:147
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
Definition: Matrix.cpp:882
bool distributed() const
Returns true if the Matrix is distributed.
Definition: Matrix.h:177
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
Definition: Matrix.h:220
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
Definition: Matrix.h:1007
int numRows() const
Returns the number of rows of the Matrix on this processor.
Definition: Matrix.h:194
void GNAT(const Matrix *f_basis, const int num_f_basis_vectors_used, std::vector< int > &f_sampled_row, std::vector< int > &f_sampled_rows_per_proc, Matrix &f_basis_sampled_inv, const int myid, const int num_procs, const int num_samples_req, std::vector< int > *init_samples)
Computes the GNAT algorithm on the given basis.
Definition: GNAT.cpp:27