14 #include "linalg/Matrix.h"
15 #include "Utilities.h"
28 const int num_f_basis_vectors_used,
29 std::vector<int>& f_sampled_row,
30 std::vector<int>& f_sampled_rows_per_proc,
31 Matrix& f_basis_sampled_inv,
34 const int num_samples_req,
35 std::vector<int> *init_samples)
37 CAROM_VERIFY(num_procs == f_sampled_rows_per_proc.size());
44 MPI_Datatype MaxRowType, oldtypes[2];
46 MPI_Aint offsets[2], extent, lower_bound;
49 oldtypes[0] = MPI_DOUBLE;
56 #if (MPI_VERSION == 1)
57 MPI_Type_extent(MPI_DOUBLE, &extent);
59 MPI_Type_get_extent(MPI_DOUBLE, &lower_bound, &extent);
63 oldtypes[1] = MPI_INT;
70 #if (MPI_VERSION == 1)
71 MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
73 MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
76 MPI_Type_commit(&MaxRowType);
80 MPI_Op_create((MPI_User_function*)RowInfoMax,
true, &RowInfoOp);
83 CAROM_VERIFY(0 < num_f_basis_vectors_used
84 && num_f_basis_vectors_used <= f_basis->numColumns());
85 const int num_basis_vectors =
86 std::min(num_f_basis_vectors_used, f_basis->
numColumns());
87 const int num_samples = num_samples_req > 0 ? num_samples_req :
89 CAROM_VERIFY(num_basis_vectors <= num_samples
90 && num_samples <= f_basis->numDistributedRows());
91 CAROM_VERIFY(num_samples == f_sampled_row.size());
92 CAROM_VERIFY(num_samples == f_basis_sampled_inv.
numRows()
93 && num_basis_vectors == f_basis_sampled_inv.
numColumns());
95 const int basis_size = f_basis->
numRows();
97 const int ns_mod_nr = num_samples % num_basis_vectors;
102 Matrix M(num_samples, std::max(num_basis_vectors-1, 1),
false);
105 double* c =
new double [num_basis_vectors];
106 double* sampled_row =
new double [num_basis_vectors];
108 std::vector<std::set<int> > proc_sampled_f_row(num_procs);
109 std::vector<std::map<int, int> > proc_f_row_to_tmp_fs_row(num_procs);
110 int num_f_basis_cols = f_basis_sampled_inv.
numColumns();
116 const int num_init_samples = init_samples ? init_samples->size() : 0;
117 int total_num_init_samples = 0;
118 MPI_Allreduce(&num_init_samples, &total_num_init_samples, 1,
119 MPI_INT, MPI_SUM, MPI_COMM_WORLD);
121 int init_sample_offset = 0;
122 if (total_num_init_samples > 0)
124 CAROM_VERIFY(init_samples);
125 std::vector<int> all_num_init_samples(num_procs);
126 std::vector<int> all_init_samples(total_num_init_samples);
128 MPI_Allgather(&num_init_samples, 1, MPI_INT, all_num_init_samples.data(),
129 1, MPI_INT, MPI_COMM_WORLD);
131 for (
int i = 0; i < myid; ++i)
133 init_sample_offset += all_num_init_samples[i];
138 RowInfo f_bv_max_local, f_bv_max_global;
140 const int ns0 = 0 < ns_mod_nr ? (num_samples / num_basis_vectors) + 1 :
141 num_samples / num_basis_vectors;
143 for (
int k=0; k<ns0; ++k)
145 f_bv_max_local.row_val = -1.0;
146 f_bv_max_local.proc = myid;
148 if (k < total_num_init_samples)
151 if (k >= init_sample_offset && k - init_sample_offset < num_init_samples)
153 f_bv_max_local.row_val = 1.0;
154 f_bv_max_local.row = (*init_samples)[k - init_sample_offset];
160 for (
int i = 0; i < basis_size; ++i) {
162 std::set<int>::const_iterator found = proc_sampled_f_row[myid].find(i);
163 if (found == proc_sampled_f_row[myid].end())
165 double f_bv_val = fabs(f_basis->
item(i, 0));
166 if (f_bv_val > f_bv_max_local.row_val) {
167 f_bv_max_local.row_val = f_bv_val;
168 f_bv_max_local.row = i;
174 MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
175 MaxRowType, RowInfoOp, MPI_COMM_WORLD);
178 if (f_bv_max_global.proc == myid) {
179 for (
int j = 0; j < num_basis_vectors; ++j) {
180 sampled_row[j] = f_basis->
item(f_bv_max_global.row, j);
183 MPI_Bcast(sampled_row, num_basis_vectors, MPI_DOUBLE,
184 f_bv_max_global.proc, MPI_COMM_WORLD);
186 for (
int j = 0; j < num_basis_vectors; ++j) {
187 tmp_fs.
item(k, j) = sampled_row[j];
189 proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
190 proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = k;
196 for (
int i = 1; i < num_basis_vectors; ++i) {
197 const int nsi = i < ns_mod_nr ? (num_samples / num_basis_vectors) + 1 :
198 num_samples / num_basis_vectors;
203 for (
int row = 0; row < ns; ++row) {
204 for (
int col = 0; col < i; ++col) {
205 M.
item(row, col) = tmp_fs.
item(row, col);
214 for (
int minv_row = 0; minv_row < i; ++minv_row) {
216 for (
int minv_col = 0; minv_col < ns; ++minv_col) {
219 tmp += M.
item(minv_row, minv_col)*tmp_fs.
item(minv_col, i);
224 tmp += M.
item(minv_col, minv_row)*tmp_fs.
item(minv_col, i);
230 for (
int k=0; k<nsi; ++k)
236 f_bv_max_local.row_val = -1.0;
237 f_bv_max_local.proc = myid;
239 if (ns + k < total_num_init_samples)
242 if (ns + k >= init_sample_offset
243 && ns + k - init_sample_offset < num_init_samples)
245 f_bv_max_local.row_val = 1.0;
246 f_bv_max_local.row = (*init_samples)[ns + k - init_sample_offset];
252 for (
int F_row = 0; F_row < basis_size; ++F_row) {
254 std::set<int>::const_iterator found = proc_sampled_f_row[myid].find(F_row);
255 if (found == proc_sampled_f_row[myid].end())
258 for (
int F_col = 0; F_col < i; ++F_col) {
259 tmp += f_basis->
item(F_row, F_col)*c[F_col];
261 const double r_val = fabs(f_basis->
item(F_row, i) - tmp);
263 if (r_val > f_bv_max_local.row_val) {
264 f_bv_max_local.row_val = r_val;
265 f_bv_max_local.row = F_row;
271 MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
272 MaxRowType, RowInfoOp, MPI_COMM_WORLD);
275 if (f_bv_max_global.proc == myid) {
276 for (
int j = 0; j < num_basis_vectors; ++j) {
277 sampled_row[j] = f_basis->
item(f_bv_max_global.row, j);
280 MPI_Bcast(sampled_row, num_basis_vectors, MPI_DOUBLE,
281 f_bv_max_global.proc, MPI_COMM_WORLD);
283 for (
int j = 0; j < num_basis_vectors; ++j) {
284 tmp_fs.
item(ns+k, j) = sampled_row[j];
286 proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
287 proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = ns+k;
293 CAROM_ASSERT(num_samples == ns);
298 for (
int i = 0; i < num_procs; ++i) {
299 std::set<int>& this_proc_sampled_f_row = proc_sampled_f_row[i];
300 std::map<int, int>& this_proc_f_row_to_tmp_fs_row =
301 proc_f_row_to_tmp_fs_row[i];
302 f_sampled_rows_per_proc[i] = this_proc_sampled_f_row.size();
303 for (std::set<int>::iterator j = this_proc_sampled_f_row.begin();
304 j != this_proc_sampled_f_row.end(); ++j) {
306 f_sampled_row[idx] = this_f_row;
307 int tmp_fs_row = this_proc_f_row_to_tmp_fs_row[this_f_row];
308 for (
int col = 0; col < num_f_basis_cols; ++col) {
309 f_basis_sampled_inv.
item(idx, col) = tmp_fs.
item(tmp_fs_row, col);
315 CAROM_ASSERT(num_samples == idx);
321 MPI_Type_free(&MaxRowType);
322 MPI_Op_free(&RowInfoOp);
325 delete [] sampled_row;
void setSize(int num_rows, int num_cols)
Sets the number of rows and columns of the matrix and reallocates storage if needed....
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
bool distributed() const
Returns true if the Matrix is distributed.
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
int numRows() const
Returns the number of rows of the Matrix on this processor.
void GNAT(const Matrix *f_basis, const int num_f_basis_vectors_used, std::vector< int > &f_sampled_row, std::vector< int > &f_sampled_rows_per_proc, Matrix &f_basis_sampled_inv, const int myid, const int num_procs, const int num_samples_req, std::vector< int > *init_samples)
Computes the GNAT algorithm on the given basis.