16 #include "utils/HDFDatabase.h"
17 #include "utils/mpi_utils.h"
24 #ifdef CAROM_HAS_ELEMENTAL
28 #include "scalapack_wrapper.h"
31 #define dsyev CAROM_FC_GLOBAL(dsyev, DSYEV)
32 #define dgeev CAROM_FC_GLOBAL(dgeev, DGEEV)
33 #define dgetrf CAROM_FC_GLOBAL(dgetrf, DGETRF)
34 #define dgetri CAROM_FC_GLOBAL(dgetri, DGETRI)
35 #define dgeqp3 CAROM_FC_GLOBAL(dgeqp3, DGEQP3)
36 #define dgesdd CAROM_FC_GLOBAL(dgesdd, DGESDD)
40 void dsyev(
char*,
char*,
int*,
double*,
int*,
double*,
double*,
int*,
int*);
43 void dgeev(
char*,
char*,
int*,
double*,
int*,
double*,
double*,
double*,
int*,
44 double*,
int*,
double*,
int*,
int*);
47 void dgetrf(
int*,
int*,
double*,
int*,
int*,
int*);
50 void dgetri(
int*,
double*,
int*,
int*,
double*,
int*,
int*);
53 void dgeqp3(
int*,
int*,
double*,
int*,
int*,
double*,
double*,
int*,
int*);
56 void dgesdd(
char*,
int*,
int*,
double*,
int*,
57 double*,
double*,
int*,
double*,
int*,
58 double*,
int*,
int*,
int*);
77 d_distributed(distributed),
80 CAROM_VERIFY(num_rows > 0);
81 CAROM_VERIFY(num_cols > 0);
83 MPI_Initialized(&mpi_init);
85 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
86 MPI_Comm_rank(MPI_COMM_WORLD, &d_rank);
94 std::default_random_engine generator;
95 std::normal_distribution<double> normal_distribution(0.0, 1.0);
96 for (
int i = 0; i < num_rows; i++) {
97 for (
int j = 0; j < num_cols; j++) {
98 item(i,j) = normal_distribution(generator);
112 d_distributed(distributed),
113 d_owns_data(copy_data)
115 CAROM_VERIFY(mat != 0);
116 CAROM_VERIFY(num_rows > 0);
117 CAROM_VERIFY(num_cols > 0);
119 MPI_Initialized(&mpi_init);
121 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
122 MPI_Comm_rank(MPI_COMM_WORLD, &d_rank);
130 memcpy(d_mat, mat, d_alloc_size*
sizeof(
double));
134 d_alloc_size = num_rows*num_cols;
135 d_num_cols = num_cols;
136 d_num_rows = num_rows;
138 calculateNumDistributedRows();
147 d_distributed(other.d_distributed),
151 MPI_Initialized(&mpi_init);
153 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
154 MPI_Comm_rank(MPI_COMM_WORLD, &d_rank);
160 setSize(other.d_num_rows, other.d_num_cols);
161 memcpy(d_mat, other.d_mat, d_alloc_size*
sizeof(
double));
166 if (d_owns_data && d_mat) {
175 d_distributed = rhs.d_distributed;
176 d_num_procs = rhs.d_num_procs;
177 setSize(rhs.d_num_rows, rhs.d_num_cols);
178 memcpy(d_mat, rhs.d_mat, d_num_rows*d_num_cols*
sizeof(
double));
186 CAROM_VERIFY(rhs.d_num_rows == d_num_rows);
187 CAROM_VERIFY(rhs.d_num_cols == d_num_cols);
188 for(
int i=0; i<d_num_rows*d_num_cols; ++i) d_mat[i] += rhs.d_mat[i];
196 CAROM_VERIFY(rhs.d_num_rows == d_num_rows);
197 CAROM_VERIFY(rhs.d_num_cols == d_num_cols);
198 for(
int i=0; i<d_num_rows*d_num_cols; ++i) d_mat[i] -= rhs.d_mat[i];
218 const MPI_Comm comm = MPI_COMM_WORLD;
223 const int first_rank_with_fewer = num_total_rows % d_num_procs;
225 CAROM_VERIFY(MPI_Comm_rank(comm, &my_rank) == MPI_SUCCESS);
227 const int min_rows_per_rank = num_total_rows / d_num_procs;
228 const bool has_extra_row = my_rank < first_rank_with_fewer;
229 const int max_rows_on_rank = min_rows_per_rank + has_extra_row;
230 const bool has_enough_rows = (d_num_rows >= min_rows_per_rank);
231 const bool has_too_many_rows = (d_num_rows > max_rows_on_rank);
233 int result = (has_enough_rows && !has_too_many_rows);
234 const int reduce_count = 1;
235 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
240 comm) == MPI_SUCCESS);
249 for(
int i=0; i<d_num_rows*d_num_cols; ++i) {
258 CAROM_VERIFY(n > 0 && n <= d_num_cols);
260 Matrix* first_n_columns = NULL;
262 return first_n_columns;
271 CAROM_VERIFY(n > 0 && n <= d_num_cols);
277 result =
new Matrix(d_num_rows, n, d_distributed);
281 result->
setSize(d_num_rows, n);
284 for (
int i = 0; i < d_num_rows; i++)
286 for (
int j = 0; j < n; j++)
299 CAROM_VERIFY(n > 0 && n <= d_num_cols);
304 for (
int i = 0; i < d_num_rows; i++)
306 for (
int j = 0; j < n; j++)
325 result =
new Matrix(d_num_rows, other.d_num_cols, d_distributed);
328 result->
setSize(d_num_rows, other.d_num_cols);
332 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
333 for (
int other_col = 0; other_col < other.d_num_cols; ++other_col) {
334 double result_val = 0.0;
335 for (
int entry = 0; entry < d_num_cols; ++entry) {
336 result_val +=
item(this_row, entry)*other.
item(entry, other_col);
338 result->
item(this_row, other_col) = result_val;
353 result.
setSize(d_num_rows, other.d_num_cols);
356 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
357 for (
int other_col = 0; other_col < other.d_num_cols; ++other_col) {
358 double result_val = 0.0;
359 for (
int entry = 0; entry < d_num_cols; ++entry) {
360 result_val +=
item(this_row, entry)*other.
item(entry, other_col);
362 result.
item(this_row, other_col) = result_val;
379 result =
new Vector(d_num_rows, d_distributed);
386 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
387 double result_val = 0.0;
388 for (
int entry = 0; entry < d_num_cols; ++entry) {
389 result_val +=
item(this_row, entry)*other.
item(entry);
391 result->
item(this_row) = result_val;
408 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
409 double result_val = 0.0;
410 for (
int entry = 0; entry < d_num_cols; ++entry) {
411 result_val +=
item(this_row, entry)*other.
item(entry);
413 result.
item(this_row) = result_val;
430 for (
int entry = 0; entry < d_num_cols; ++entry) {
431 result.
item(entry) =
item(this_row, entry)*other.
item(entry);
445 for (
int entry = 0; entry < d_num_cols; ++entry) {
446 other.
item(entry) *=
item(this_row, entry);
463 result =
new Matrix(d_num_rows, d_num_cols, d_distributed);
466 result->
setSize(d_num_rows, d_num_cols);
470 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
471 for (
int other_col = 0; other_col < d_num_cols; ++other_col) {
472 result->
item(this_row, other_col) =
item(this_row,
473 other_col) * other.
item(this_row, other_col);
489 result.
setSize(d_num_rows, other.d_num_cols);
492 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
493 for (
int other_col = 0; other_col < d_num_cols; ++other_col) {
494 result.
item(this_row, other_col) =
item(this_row,
495 other_col) * other.
item(this_row, other_col);
507 result =
new Matrix(d_num_rows, d_num_cols, d_distributed);
510 result->
setSize(d_num_rows, d_num_cols);
514 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
515 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
516 result->
item(this_row, this_col) =
item(this_row, this_col) *
item(this_row,
527 result.
setSize(d_num_rows, d_num_cols);
530 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
531 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
532 result.
item(this_row, this_col) =
item(this_row, this_col) *
item(this_row,
549 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
551 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
552 tmp +=
item(this_row, this_col)*b.
item(this_col);
554 a.
item(this_row) += tmp*c;
563 CAROM_VERIFY(result == 0 || !result->
distributed());
570 result =
new Matrix(d_num_cols, other.d_num_cols,
false);
573 result->
setSize(d_num_cols, other.d_num_cols);
577 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
578 for (
int other_col = 0; other_col < other.d_num_cols; ++other_col) {
579 double result_val = 0.0;
580 for (
int entry = 0; entry < d_num_rows; ++entry) {
581 result_val +=
item(entry, this_col)*other.
item(entry, other_col);
583 result->
item(this_col, other_col) = result_val;
586 if (d_distributed && d_num_procs > 1) {
587 int new_mat_size = d_num_cols*other.d_num_cols;
588 MPI_Allreduce(MPI_IN_PLACE,
607 result.
setSize(d_num_cols, other.d_num_cols);
610 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
611 for (
int other_col = 0; other_col < other.d_num_cols; ++other_col) {
612 double result_val = 0.0;
613 for (
int entry = 0; entry < d_num_rows; ++entry) {
614 result_val +=
item(entry, this_col)*other.
item(entry, other_col);
616 result.
item(this_col, other_col) = result_val;
619 if (d_distributed && d_num_procs > 1) {
620 int new_mat_size = d_num_cols*other.d_num_cols;
621 MPI_Allreduce(MPI_IN_PLACE,
635 CAROM_VERIFY(result == 0 || !result->
distributed());
642 result =
new Vector(d_num_cols,
false);
649 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
650 double result_val = 0.0;
651 for (
int entry = 0; entry < d_num_rows; ++entry) {
652 result_val +=
item(entry, this_col)*other.
item(entry);
654 result->
item(this_col) = result_val;
656 if (d_distributed && d_num_procs > 1) {
657 MPI_Allreduce(MPI_IN_PLACE,
680 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
681 double result_val = 0.0;
682 for (
int entry = 0; entry < d_num_rows; ++entry) {
683 result_val +=
item(entry, this_col)*other.
item(entry);
685 result.
item(this_col) = result_val;
687 if (d_distributed && d_num_procs > 1) {
688 MPI_Allreduce(MPI_IN_PLACE,
703 result =
new Vector(d_num_rows,
true);
706 result =
new Vector(d_num_rows,
false);
717 for (
int i = 0; i < d_num_rows; i++) {
726 CAROM_VERIFY(result == 0 ||
736 result =
new Matrix(d_num_rows, d_num_cols,
false);
739 result->
setSize(d_num_rows, d_num_cols);
745 int mtx_size = d_num_rows;
746 int lwork = mtx_size*mtx_size;
747 int* ipiv =
new int [mtx_size];
748 double* work =
new double [lwork];
751 for (
int row = 0; row < mtx_size; ++row) {
752 for (
int col = 0; col < mtx_size; ++col) {
753 result->
item(col, row) =
item(row, col);
757 dgetrf(&mtx_size, &mtx_size, result->d_mat, &mtx_size, ipiv, &info);
758 CAROM_VERIFY(info == 0);
760 dgetri(&mtx_size, result->d_mat, &mtx_size, ipiv, work, &lwork, &info);
761 CAROM_VERIFY(info == 0);
765 for (
int row = 0; row < mtx_size; ++row) {
766 for (
int col = row+1; col < mtx_size; ++col) {
767 double tmp = result->
item(row, col);
768 result->
item(row, col) = result->
item(col, row);
769 result->
item(col, row) = tmp;
787 result.
setSize(d_num_rows, d_num_cols);
792 int mtx_size = d_num_rows;
793 int lwork = mtx_size*mtx_size;
794 int* ipiv =
new int [mtx_size];
795 double* work =
new double [lwork];
798 for (
int row = 0; row < mtx_size; ++row) {
799 for (
int col = 0; col < mtx_size; ++col) {
800 result.
item(col, row) =
item(row, col);
804 dgetrf(&mtx_size, &mtx_size, result.d_mat, &mtx_size, ipiv, &info);
805 CAROM_VERIFY(info == 0);
807 dgetri(&mtx_size, result.d_mat, &mtx_size, ipiv, work, &lwork, &info);
808 CAROM_VERIFY(info == 0);
812 for (
int row = 0; row < mtx_size; ++row) {
813 for (
int col = row+1; col < mtx_size; ++col) {
814 double tmp = result.
item(row, col);
815 result.
item(row, col) = result.
item(col, row);
816 result.
item(col, row) = tmp;
828 for (
int i=0; i<n-1; ++i)
830 for (
int j=i+1; j<n; ++j)
832 const double t = d_mat[i*n+j];
833 d_mat[i*n+j] = d_mat[j*n+i];
848 int mtx_size = d_num_rows;
849 int lwork = mtx_size*mtx_size;
850 int* ipiv =
new int [mtx_size];
851 double* work =
new double [lwork];
854 for (
int row = 0; row < mtx_size; ++row) {
855 for (
int col = row+1; col < mtx_size; ++col) {
856 double tmp =
item(row, col);
858 item(col, row) = tmp;
862 dgetrf(&mtx_size, &mtx_size, d_mat, &mtx_size, ipiv, &info);
863 CAROM_VERIFY(info == 0);
865 dgetri(&mtx_size, d_mat, &mtx_size, ipiv, work, &lwork, &info);
866 CAROM_VERIFY(info == 0);
870 for (
int row = 0; row < mtx_size; ++row) {
871 for (
int col = row+1; col < mtx_size; ++col) {
872 double tmp =
item(row, col);
874 item(col, row) = tmp;
901 for (
int i=0; i<
numRows(); ++i)
921 const bool success = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
922 CAROM_ASSERT(success);
924 std::string filename_str = prefix + std::to_string(my_rank);
925 const char * filename = filename_str.c_str();
926 FILE * pFile = fopen(filename,
"w");
927 for (
int row = 0; row < d_num_rows; ++row) {
928 for (
int col = 0; col < d_num_cols; ++col) {
929 fprintf(pFile,
" %25.20e\t",
item(row,col));
931 fprintf(pFile,
"\n");
939 CAROM_VERIFY(!base_file_name.empty());
942 MPI_Initialized(&mpi_init);
945 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
952 sprintf(tmp,
".%06d", rank);
953 std::string full_file_name = base_file_name + tmp;
955 database.
create(full_file_name);
957 sprintf(tmp,
"distributed");
959 sprintf(tmp,
"num_rows");
961 sprintf(tmp,
"num_cols");
963 sprintf(tmp,
"data");
971 CAROM_VERIFY(!base_file_name.empty());
974 MPI_Initialized(&mpi_init);
977 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
978 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
986 sprintf(tmp,
".%06d", rank);
987 std::string full_file_name = base_file_name + tmp;
989 database.
open(full_file_name,
"r");
991 sprintf(tmp,
"distributed");
996 sprintf(tmp,
"num_rows");
999 sprintf(tmp,
"num_cols");
1002 sprintf(tmp,
"data");
1011 CAROM_VERIFY(!base_file_name.empty());
1014 MPI_Initialized(&mpi_init);
1016 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
1023 sprintf(tmp,
".%06d", rank);
1024 std::string full_file_name = base_file_name + tmp;
1026 database.
open(full_file_name,
"r");
1028 sprintf(tmp,
"distributed");
1033 sprintf(tmp,
"num_rows");
1036 sprintf(tmp,
"num_cols");
1039 sprintf(tmp,
"data");
1049 CAROM_VERIFY(d_owns_data);
1051 std::vector<int> row_offsets;
1054 CAROM_VERIFY(num_total_rows == d_num_rows);
1055 int local_offset = row_offsets[d_rank] * d_num_cols;
1056 const int new_size = local_num_rows * d_num_cols;
1058 double *d_new_mat =
new double [new_size];
1060 memcpy(d_new_mat, &d_mat[local_offset], 8 * new_size);
1064 d_alloc_size = new_size;
1066 d_num_distributed_rows = d_num_rows;
1067 d_num_rows = local_num_rows;
1069 d_distributed =
true;
1076 CAROM_VERIFY(d_owns_data);
1078 std::vector<int> row_offsets;
1081 CAROM_VERIFY(num_total_rows == d_num_distributed_rows);
1082 const int new_size = d_num_distributed_rows * d_num_cols;
1084 int *data_offsets =
new int[row_offsets.size() - 1];
1085 int *data_cnts =
new int[row_offsets.size() - 1];
1086 for (
int k = 0; k < row_offsets.size() - 1; k++)
1088 data_offsets[k] = row_offsets[k] * d_num_cols;
1089 data_cnts[k] = (row_offsets[k+1] - row_offsets[k]) * d_num_cols;
1092 double *d_new_mat =
new double [new_size] {0.0};
1093 CAROM_VERIFY(MPI_Allgatherv(d_mat, d_alloc_size, MPI_DOUBLE,
1094 d_new_mat, data_cnts, data_offsets, MPI_DOUBLE,
1095 MPI_COMM_WORLD) == MPI_SUCCESS);
1098 delete [] data_offsets, data_cnts;
1100 d_alloc_size = new_size;
1102 d_num_rows = d_num_distributed_rows;
1104 d_distributed =
false;
1108 Matrix::calculateNumDistributedRows() {
1109 if (d_distributed && d_num_procs > 1) {
1110 int num_total_rows = d_num_rows;
1111 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
1116 MPI_COMM_WORLD) == MPI_SUCCESS);
1117 d_num_distributed_rows = num_total_rows;
1120 d_num_distributed_rows = d_num_rows;
1128 MPI_Comm_rank(MPI_COMM_WORLD, &myid);
1130 std::vector<int> row_offset(d_num_procs + 1);
1134 CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE,
1140 MPI_COMM_WORLD) == MPI_SUCCESS);
1141 for (
int i = d_num_procs - 1; i >= 0; i--) {
1142 row_offset[i] = row_offset[i + 1] - row_offset[i];
1145 CAROM_VERIFY(row_offset[0] == 0);
1149 int nrow_blocks = d_num_procs;
1150 int ncol_blocks = 1;
1152 int blocksize = row_offset[d_num_procs] / d_num_procs;
1153 if (row_offset[d_num_procs] % d_num_procs != 0) blocksize += 1;
1155 ncol_blocks, nrow_blocks,
numColumns(), blocksize);
1156 for (
int rank = 0; rank < d_num_procs; ++rank) {
1157 scatter_block(&slpk, 1, row_offset[rank] + 1,
1159 row_offset[rank + 1] - row_offset[rank], rank);
1163 qr_init(&QRmgr, &slpk);
1164 lqfactorize(&QRmgr);
1167 for (
int i = row_offset[myid]; i <
numColumns(); i++) {
1168 for (
int j = 0; j < i - row_offset[myid] && j < row_offset[myid + 1]; j++) {
1169 QRmgr.A->mdata[j * QRmgr.A->mm + i] = 0;
1171 if (i < row_offset[myid + 1]) {
1172 QRmgr.A->mdata[(i - row_offset[myid]) * QRmgr.A->mm + i] = 1;
1178 Matrix* qr_factorized_matrix =
new Matrix(row_offset[myid + 1] -
1181 for (
int rank = 0; rank < d_num_procs; ++rank) {
1182 gather_block(&qr_factorized_matrix->
item(0, 0), QRmgr.A,
1183 1, row_offset[rank] + 1,
1184 numColumns(), row_offset[rank + 1] - row_offset[rank],
1188 free_matrix_data(&slpk);
1189 release_context(&slpk);
1194 return qr_factorized_matrix;
1199 int* row_pivot_owner,
1200 int pivots_requested)
const
1203 return qrcp_pivots_transpose_serial(row_pivot,
1208 return qrcp_pivots_transpose_distributed(row_pivot,
1215 Matrix::qrcp_pivots_transpose_serial(
int* row_pivot,
1216 int* row_pivot_owner,
1217 int pivots_requested)
const
1224 CAROM_VERIFY(pivots_requested <=
numRows());
1225 CAROM_VERIFY(pivots_requested > 0);
1229 CAROM_VERIFY(row_pivot != NULL);
1230 CAROM_VERIFY(row_pivot_owner != NULL);
1234 int num_cols_of_transpose =
numRows();
1248 int lwork = 20 * num_cols_of_transpose + 1;
1249 double* work =
new double[lwork];
1250 double* tau =
new double[std::min(num_rows_of_transpose,
1251 num_cols_of_transpose)];
1252 int* pivot =
new int[num_cols_of_transpose] ();
1260 dgeqp3(&num_rows_of_transpose,
1261 &num_cols_of_transpose,
1263 &num_rows_of_transpose,
1271 CAROM_VERIFY(info == 0);
1275 int is_mpi_initialized, is_mpi_finalized;
1276 CAROM_VERIFY(MPI_Initialized(&is_mpi_initialized) == MPI_SUCCESS);
1277 CAROM_VERIFY(MPI_Finalized(&is_mpi_finalized) == MPI_SUCCESS);
1279 if(is_mpi_initialized && !is_mpi_finalized) {
1280 const MPI_Comm my_comm = MPI_COMM_WORLD;
1281 CAROM_VERIFY(MPI_Comm_rank(my_comm, &my_rank) == MPI_SUCCESS);
1289 for (
int i = 0; i < pivots_requested; i++) {
1290 row_pivot[i] = pivot[i] - 1;
1291 row_pivot_owner[i] = my_rank;
1301 Matrix::qrcp_pivots_transpose_distributed(
int* row_pivot,
1302 int* row_pivot_owner,
1303 int pivots_requested)
1309 #ifdef CAROM_HAS_ELEMENTAL
1313 return qrcp_pivots_transpose_distributed_elemental
1314 (row_pivot, row_pivot_owner, pivots_requested);
1316 qrcp_pivots_transpose_distributed_scalapack
1317 (row_pivot, row_pivot_owner, pivots_requested);
1322 Matrix::qrcp_pivots_transpose_distributed_scalapack
1323 (
int* row_pivot,
int* row_pivot_owner,
int pivots_requested)
const
1328 int num_total_rows = d_num_rows;
1329 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
1334 MPI_COMM_WORLD) == MPI_SUCCESS);
1336 int *row_offset =
new int[d_num_procs + 1];
1337 row_offset[d_num_procs] = num_total_rows;
1340 MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
1342 row_offset[my_rank] = d_num_rows;
1344 CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE,
1350 MPI_COMM_WORLD) == MPI_SUCCESS);
1352 for (
int i = d_num_procs - 1; i >= 0; i--) {
1353 row_offset[i] = row_offset[i + 1] - row_offset[i];
1355 CAROM_VERIFY(row_offset[0] == 0);
1358 int blocksize = row_offset[d_num_procs] / d_num_procs;
1359 if (row_offset[d_num_procs] % d_num_procs != 0) blocksize += 1;
1361 initialize_matrix(&slpk, d_num_cols, row_offset[d_num_procs], 1, d_num_procs, 1,
1364 CAROM_VERIFY(d_num_cols <=
1367 for (
int rank = 0; rank < d_num_procs; ++rank) {
1369 scatter_block(&slpk, 1, row_offset[rank]+1,
1371 d_num_cols, row_offset[rank+1] - row_offset[rank],
1376 qr_init(&QRmgr, &slpk);
1377 qrfactorize(&QRmgr);
1380 CAROM_VERIFY(0 < pivots_requested && pivots_requested <= QRmgr.ipivSize);
1381 CAROM_VERIFY(pivots_requested <= std::max(d_num_rows, d_num_cols));
1383 const int scount = std::max(0, std::min(pivots_requested,
1384 row_offset[my_rank+1]) - row_offset[my_rank]);
1385 int *mypivots = (scount > 0) ?
new int[scount] : NULL;
1387 for (
int i=0; i<scount; ++i)
1388 mypivots[i] = QRmgr.ipiv[i]-1;
1390 int *rcount = (my_rank == 0) ?
new int[d_num_procs] : NULL;
1391 int *rdisp = (my_rank == 0) ?
new int[d_num_procs] : NULL;
1393 MPI_Gather(&scount, 1, MPI_INT, rcount, 1, MPI_INT, 0, MPI_COMM_WORLD);
1398 for (
int i = 1; i<d_num_procs; ++i)
1399 rdisp[i] = rdisp[i-1] + rcount[i-1];
1401 CAROM_VERIFY(rdisp[d_num_procs-1] + rcount[d_num_procs-1] == pivots_requested);
1404 MPI_Gatherv(mypivots, scount, MPI_INT, row_pivot, rcount, rdisp, MPI_INT, 0,
1411 for (
int i=0; i<pivots_requested; ++i)
1413 row_pivot_owner[i] = -1;
1414 for (
int j=d_num_procs-1; j>=0; --j)
1416 if (row_offset[j] <= row_pivot[i])
1418 row_pivot_owner[i] = j;
1424 CAROM_VERIFY(row_pivot_owner[i] >= 0);
1425 CAROM_VERIFY(row_offset[row_pivot_owner[i]] <= row_pivot[i]
1426 && row_pivot[i] < row_offset[row_pivot_owner[i]+1]);
1431 for (
int i=0; i<scount; ++i)
1432 row_pivot[i] = QRmgr.ipiv[i]-1;
1435 free_matrix_data(&slpk);
1436 release_context(&slpk);
1443 delete [] row_offset;
1447 Matrix::qrcp_pivots_transpose_distributed_elemental
1448 (
int* row_pivot,
int* row_pivot_owner,
int pivots_requested)
1451 #ifdef CAROM_HAS_ELEMENTAL
1457 qrcp_pivots_transpose_distributed_elemental_balanced
1458 (row_pivot, row_pivot_owner, pivots_requested);
1461 qrcp_pivots_transpose_distributed_elemental_unbalanced
1462 (row_pivot, row_pivot_owner, pivots_requested);
1465 CAROM_VERIFY(
false);
1470 Matrix::qrcp_pivots_transpose_distributed_elemental_balanced
1471 (
int* row_pivot,
int* row_pivot_owner,
int pivots_requested)
1474 #ifdef CAROM_HAS_ELEMENTAL
1501 CAROM_VERIFY(row_pivot != NULL);
1502 CAROM_VERIFY(row_pivot_owner != NULL);
1505 const MPI_Comm comm = MPI_COMM_WORLD;
1506 const int master_rank = 0;
1508 int num_total_rows = d_num_rows;
1509 const int reduce_count = 1;
1510 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
1515 comm) == MPI_SUCCESS);
1519 CAROM_VERIFY(pivots_requested <= num_total_rows);
1520 CAROM_VERIFY(pivots_requested > 0);
1528 const El::Grid grid(comm, 1);
1532 CAROM_VERIFY(El::mpi::Size(grid.RowComm()) == d_num_procs);
1537 El::Int height =
static_cast<El::Int
>(
numColumns());
1538 El::Int width =
static_cast<El::Int
>(
numRows());
1539 El::Int root =
static_cast<El::Int
>(master_rank);
1540 El::DistMatrix<double> scratch(height, width, grid, root);
1553 CAROM_VERIFY(scratch.RowOwner(0) == scratch.RowOwner(1));
1555 CAROM_VERIFY(MPI_Comm_rank(comm, &my_rank) == MPI_SUCCESS);
1556 El::Int rank_as_row =
static_cast<El::Int
>(my_rank);
1557 CAROM_VERIFY(scratch.ColOwner(rank_as_row) == rank_as_row);
1560 El::DistMatrix<double> householder_scalars(grid);
1561 El::DistMatrix<double> diagonal(grid);
1562 El::DistPermutation perm(grid);
1563 El::QRCtrl<double> ctrl;
1590 for (
int row = 0; row < d_num_rows; row++) {
1591 El::Int el_loc_col =
static_cast<El::Int
>(row);
1592 El::Int el_global_col = scratch.GlobalCol(el_loc_col);
1593 for (
int col = 0; col < d_num_cols; col++) {
1594 El::Int el_loc_row =
static_cast<El::Int
>(col);
1595 El::Int el_global_row = scratch.GlobalRow(el_loc_row);
1596 scratch.Set(el_global_row, el_global_col, this->
item(row, col));
1602 El::QR(scratch, householder_scalars, diagonal, perm);
1606 for (
size_t i = 0; i < pivots_requested; i++) {
1607 El::Int el_i =
static_cast<El::Int
>(i);
1608 El::Int el_perm_i = perm.Image(el_i);
1613 El::Int el_loc_i = scratch.LocalCol(el_perm_i);
1614 int loc_i =
static_cast<int>(el_loc_i);
1615 row_pivot[i] = loc_i;
1621 El::Int el_owner = scratch.ColOwner(el_perm_i);
1622 int owner =
static_cast<int>(el_owner);
1623 row_pivot_owner[i] = owner;
1626 CAROM_VERIFY(
false);
1631 Matrix::qrcp_pivots_transpose_distributed_elemental_unbalanced
1632 (
int* row_pivot,
int* row_pivot_owner,
int pivots_requested)
1635 #ifdef CAROM_HAS_ELEMENTAL
1655 CAROM_VERIFY(row_pivot != NULL);
1656 CAROM_VERIFY(row_pivot_owner != NULL);
1659 const MPI_Comm comm = MPI_COMM_WORLD;
1660 const int master_rank = 0;
1662 int num_total_rows = d_num_rows;
1663 const int reduce_count = 1;
1664 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
1669 comm) == MPI_SUCCESS);
1673 CAROM_VERIFY(pivots_requested <= num_total_rows);
1674 CAROM_VERIFY(pivots_requested > 0);
1682 const El::Grid grid(comm, 1);
1686 CAROM_VERIFY(El::mpi::Size(grid.RowComm()) == d_num_procs);
1691 El::Int height =
static_cast<El::Int
>(
numColumns());
1692 El::Int width =
static_cast<El::Int
>(
numRows());
1693 El::Int root =
static_cast<El::Int
>(master_rank);
1694 El::DistMatrix<double> scratch(height, width, grid, root);
1707 CAROM_VERIFY(scratch.RowOwner(0) == scratch.RowOwner(1));
1709 CAROM_VERIFY(MPI_Comm_rank(comm, &my_rank) == MPI_SUCCESS);
1710 El::Int rank_as_row =
static_cast<El::Int
>(my_rank);
1711 CAROM_VERIFY(scratch.ColOwner(rank_as_row) == rank_as_row);
1714 El::DistMatrix<double> householder_scalars(grid);
1715 El::DistMatrix<double> diagonal(grid);
1716 El::DistPermutation perm(grid);
1717 El::QRCtrl<double> ctrl;
1734 int *row_offset =
new int[d_num_procs + 1];
1735 row_offset[d_num_procs + 1] = num_total_rows;
1737 row_offset[my_rank] = d_num_rows;
1738 const int send_recv_count = 1;
1739 CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE,
1745 comm) == MPI_SUCCESS);
1747 for (
int i = d_num_procs - 1; i >= 0; i--) {
1748 row_offset[i] = row_offset[i + 1] - row_offset[i];
1750 CAROM_VERIFY(row_offset[0] == 0);
1754 for (
int row = row_offset[my_rank]; row < row_offset[my_rank + 1]; row++) {
1755 El::Int el_loc_col =
static_cast<El::Int
>(row - row_offset[my_rank]);
1756 El::Int el_global_col = scratch.GlobalCol(el_loc_col);
1757 for (
int col = 0; col < d_num_cols; col++) {
1758 El::Int el_loc_row =
static_cast<El::Int
>(col);
1759 El::Int el_global_row = scratch.GlobalRow(el_loc_row);
1760 scratch.Set(el_global_row, el_global_col, this->
item(row, col));
1766 El::QR(scratch, householder_scalars, diagonal, perm);
1770 for (
size_t i = 0; i < pivots_requested; i++) {
1771 El::Int el_i =
static_cast<El::Int
>(i);
1772 El::Int el_perm_i = perm.Image(el_i);
1778 int global_row_index =
static_cast<int>(el_perm_i);
1780 for (rank = 0; rank < d_num_procs; rank++) {
1781 bool is_at_or_above_lower_bound = (global_row_index >= row_offset[rank]);
1782 bool is_below_upper_bound = (global_row_index < row_offset[rank + 1]);
1783 if (is_at_or_above_lower_bound && is_below_upper_bound) {
1784 row_pivot_owner[i] = rank;
1792 row_pivot[i] = global_row_index - row_offset[rank];
1796 delete [] row_offset;
1798 CAROM_VERIFY(
false);
1805 int const num_passes = double_pass ? 2 : 1;
1807 for (
int work = 0; work < d_num_cols; ++work)
1810 for (
int k = 0; k < num_passes; k++)
1812 for (
int col = 0; col < work; ++col)
1814 double factor = 0.0;
1816 for (
int i = 0; i < d_num_rows; ++i)
1817 factor +=
item(i, col) *
item(i, work);
1819 if (d_distributed && d_num_procs > 1)
1821 CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &factor, 1,
1822 MPI_DOUBLE, MPI_SUM,
1826 for (
int i = 0; i < d_num_rows; ++i)
1827 item(i, work) -= factor *
item(i, col);
1834 for (
int i = 0; i < d_num_rows; ++i)
1835 norm +=
item(i, work) *
item(i, work);
1837 if (d_distributed && d_num_procs > 1)
1839 CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &norm, 1, MPI_DOUBLE,
1840 MPI_SUM, MPI_COMM_WORLD)
1843 if (norm > zero_tol)
1845 norm = 1.0 / sqrt(norm);
1846 for (
int i = 0; i < d_num_rows; ++i)
1847 item(i, work) *= norm;
1855 if (ncols == -1) ncols = d_num_cols;
1856 CAROM_VERIFY((ncols > 0) && (ncols <= d_num_cols));
1858 const int last_col = ncols - 1;
1860 int const num_passes = double_pass ? 2 : 1;
1863 for (
int k = 0; k < num_passes; k++)
1865 for (
int col = 0; col < last_col; ++col)
1867 double factor = 0.0;
1869 for (
int i = 0; i < d_num_rows; ++i)
1870 factor +=
item(i, col) *
item(i, last_col);
1872 if (d_distributed && d_num_procs > 1)
1874 CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &factor, 1, MPI_DOUBLE,
1875 MPI_SUM, MPI_COMM_WORLD)
1878 for (
int i = 0; i < d_num_rows; ++i)
1879 item(i, last_col) -= factor *
item(i, col);
1886 for (
int i = 0; i < d_num_rows; ++i)
1887 norm +=
item(i, last_col) *
item(i, last_col);
1889 if (d_distributed && d_num_procs > 1)
1891 CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &norm, 1, MPI_DOUBLE,
1892 MPI_SUM, MPI_COMM_WORLD) == MPI_SUCCESS );
1894 if (norm > zero_tol)
1896 norm = 1.0 / sqrt(norm);
1897 for (
int i = 0; i < d_num_rows; ++i)
1898 item(i, last_col) *= norm;
1910 for (
int i = 0; i < d_num_rows; i++)
1913 double row_max = fabs(
item(i, 0));
1914 for (
int j = 1; j < d_num_cols; j++)
1916 if (fabs(
item(i, j)) > row_max)
1917 row_max = fabs(
item(i, j));
1921 if (row_max > 1.0e-14)
1923 for (
int j = 0; j < d_num_cols; j++)
1924 item(i, j) /= row_max;
1937 double local_max[d_num_cols];
1938 for (
int j = 0; j < d_num_cols; j++)
1940 local_max[j] = fabs(
item(0, j));
1941 for (
int i = 1; i < d_num_rows; i++)
1943 if (fabs(
item(i, j)) > local_max[j])
1944 local_max[j] = fabs(
item(i, j));
1949 double global_max[d_num_cols];
1950 if (d_distributed && d_num_procs > 1)
1952 MPI_Allreduce(&local_max, &global_max, d_num_cols, MPI_DOUBLE, MPI_MAX,
1957 for (
int i = 0; i < d_num_cols; i++)
1958 global_max[i] = local_max[i];
1962 for (
int j = 0; j < d_num_cols; j++)
1964 if (global_max[j] > 1.0e-14)
1966 double tmp = 1.0 / global_max[j];
1967 for (
int i = 0; i < d_num_rows; i++)
1983 int result_num_rows = v.
dim();
1984 int result_num_cols;
1995 result_num_cols = w.
dim();
2005 int process_local_num_cols = w.
dim();
2006 MPI_Comm comm = MPI_COMM_WORLD;
2007 MPI_Datatype num_cols_datatype = MPI_INT;
2009 MPI_Comm_size(comm, &num_procs);
2010 std::vector<int> num_cols_on_proc(num_procs, 0);
2011 int num_procs_send_count = 1;
2012 int num_procs_recv_count = 1;
2013 MPI_Allgather(&process_local_num_cols,
2014 num_procs_send_count,
2016 num_cols_on_proc.data(),
2017 num_procs_recv_count,
2023 std::vector<int> gathered_w_displacements(num_procs, 0);
2024 result_num_cols = 0;
2025 for (
int i = 0; i < num_cols_on_proc.size(); i++)
2027 gathered_w_displacements.at(i) = result_num_cols;
2028 result_num_cols += num_cols_on_proc.at(i);
2034 std::vector<double> w_data(process_local_num_cols);
2035 for (
int i = 0; i < w_data.size(); i++)
2037 w_data.at(i) = w(i);
2040 std::vector<double> gathered_w_data(result_num_cols);
2041 MPI_Datatype entry_datatype = MPI_DOUBLE;
2042 MPI_Allgatherv(w_data.data(),
2043 process_local_num_cols,
2045 gathered_w_data.data(),
2046 num_cols_on_proc.data(),
2047 gathered_w_displacements.data(),
2051 gathered_w.
setSize(result_num_cols);
2052 for (
int i = 0; i < gathered_w_data.size(); i++)
2054 gathered_w(i) = gathered_w_data.at(i);
2059 Matrix result(result_num_rows, result_num_cols, is_distributed);
2062 for (
int i = 0; i < result_num_rows; i++)
2064 for (
int j = 0; j < result_num_cols; j++)
2066 result(i, j) = v(i) * gathered_w(j);
2075 const int resultNumRows = v.
dim();
2076 int resultNumColumns, processRowStartIndex, processNumColumns;
2080 if (
false == isDistributed)
2082 resultNumColumns = processNumColumns = resultNumRows;
2083 processRowStartIndex = 0;
2087 using sizeType = std::vector<int>::size_type;
2094 const MPI_Comm comm = MPI_COMM_WORLD;
2096 MPI_Comm_size(comm, &numProcesses);
2098 numRowsOnProcess(
static_cast<sizeType
>(numProcesses));
2100 const MPI_Datatype indexType = MPI_INT;
2101 MPI_Allgather(&resultNumRows, one, indexType,
2102 numRowsOnProcess.data(), one, indexType, comm);
2115 std::vector<int> rowStartIndexOnProcess(numProcesses);
2116 resultNumColumns = 0;
2117 for (sizeType i = 0; i < rowStartIndexOnProcess.size(); i++)
2119 rowStartIndexOnProcess.at(i) = resultNumColumns;
2120 resultNumColumns += numRowsOnProcess.at(i);
2123 MPI_Comm_rank(comm, &processNumber);
2124 processRowStartIndex =
2125 rowStartIndexOnProcess.at(
static_cast<sizeType
>(processNumber));
2132 Matrix diagonalMatrix(resultNumRows, resultNumColumns, isDistributed);
2133 for (
int i = 0; i < resultNumRows; i++)
2135 for (
int j = 0; j < resultNumColumns; j++)
2141 const double entry = (j == (i + processRowStartIndex)) ? v(i) : 0.0;
2142 diagonalMatrix(i, j) = entry;
2146 return diagonalMatrix;
2158 char jobz =
'V', uplo =
'U';
2161 int k = A->numColumns();
2162 int lwork = std::max(1, 10*k-1);
2163 double* work =
new double [lwork];
2164 double* eigs =
new double [k];
2165 Matrix* ev =
new Matrix(*A);
2169 for (
int row = 0; row < k; ++row) {
2170 for (
int col = row+1; col < k; ++col) {
2171 double tmp = ev->item(row, col);
2172 ev->item(row, col) = ev->item(col, row);
2173 ev->item(col, row) = tmp;
2178 dsyev(&jobz, &uplo, &k, ev->getData(), &k, eigs, work, &lwork, &info);
2182 for (
int row = 0; row < k; ++row) {
2183 for (
int col = row+1; col < k; ++col) {
2184 double tmp = ev->item(row, col);
2185 ev->item(row, col) = ev->item(col, row);
2186 ev->item(col, row) = tmp;
2190 EigenPair eigenpair;
2192 for (
int i = 0; i < k; i++)
2194 eigenpair.eigs.push_back(eigs[i]);
2205 char jobvl =
'N', jobrl =
'V';
2208 int k = A->numColumns();
2209 int lwork = std::max(k*k, 10*k);
2210 double* work =
new double [lwork];
2211 double* e_real =
new double [k];
2212 double* e_imaginary =
new double [k];
2213 double* ev_l = NULL;
2214 Matrix* ev_r =
new Matrix(k, k,
false);
2215 Matrix* A_copy =
new Matrix(*A);
2219 for (
int row = 0; row < k; ++row) {
2220 for (
int col = row+1; col < k; ++col) {
2221 double tmp = A_copy->item(row, col);
2222 A_copy->item(row, col) = A_copy->item(col, row);
2223 A_copy->item(col, row) = tmp;
2228 dgeev(&jobvl, &jobrl, &k, A_copy->getData(), &k, e_real, e_imaginary, ev_l,
2229 &k, ev_r->getData(), &k, work, &lwork, &info);
2233 for (
int row = 0; row < k; ++row) {
2234 for (
int col = row+1; col < k; ++col) {
2235 double tmp = ev_r->item(row, col);
2236 ev_r->item(row, col) = ev_r->item(col, row);
2237 ev_r->item(col, row) = tmp;
2241 ComplexEigenPair eigenpair;
2242 eigenpair.ev_real =
new Matrix(k, k,
false);
2243 eigenpair.ev_imaginary =
new Matrix(k, k,
false);
2246 for (
int i = 0; i < k; ++i)
2248 for (
int row = 0; row < k; ++row) {
2249 eigenpair.ev_real->item(row, i) = ev_r->item(row, i);
2251 if (e_imaginary[i] != 0)
2253 for (
int row = 0; row < k; ++row) {
2254 eigenpair.ev_real->item(row, i + 1) = ev_r->item(row, i);
2255 eigenpair.ev_imaginary->item(row, i) = ev_r->item(row, i + 1);
2256 eigenpair.ev_imaginary->item(row, i + 1) = -ev_r->item(row, i + 1);
2265 for (
int i = 0; i < k; i++)
2267 eigenpair.eigs.push_back(std::complex<double>(e_real[i], e_imaginary[i]));
2272 delete [] e_imaginary;
2291 U =
new Matrix(m, std::min(m, n),
false);
2296 U->
setSize(m, std::min(m, n));
2301 V =
new Matrix(std::min(m, n), n,
false);
2305 V->
setSize(std::min(m, n), n);
2310 S =
new Vector(n,
false);
2321 int mn = std::min(m, n);
2322 int lwork = 4 * mn * mn + 7 * mn;
2323 double* work =
new double [lwork];
2324 int iwork[8*std::min(m, n)];
2329 &ldv, work, &lwork, iwork, &info);
2331 CAROM_VERIFY(info == 0);
2337 struct SerialSVDDecomposition
SerialSVD(Matrix* A)
2339 CAROM_VERIFY(!A->distributed());
2346 struct SerialSVDDecomposition decomp;
2358 const std::vector<double> *tscale,
2359 const bool At0at0,
const bool Bt0at0,
const bool lagB,
2365 const int AtOS = At0at0 ? 1 : 0;
2366 const int BtOS0 = Bt0at0 ? 1 : 0;
2367 const int BtOS = BtOS0 + (lagB ? 1 : 0);
2372 const int nspace = As->
numRows();
2373 const int ntime = At->
numRows() + AtOS;
2375 CAROM_VERIFY(nspace == Bs->
numRows() && ntime == Bt->
numRows() + BtOS0);
2381 const int k0AB = std::max(AtOS, BtOS);
2382 const int k00 = skip0 ? 1 : 0;
2383 const int k0 = std::max(k0AB, k00);
2385 CAROM_VERIFY(tscale == NULL || (tscale->size() == ntime-1 && k0 > 0));
2389 for (
int i=0; i<nrows; ++i)
2391 for (
int j=0; j<ncols; ++j)
2395 for (
int k=k0; k<ntime; ++k)
2399 const double At_k = At->
item(k - AtOS,i);
2400 const double Bt_k = Bt->
item(k - BtOS,j);
2401 const double ts = (tscale == NULL) ? 1.0 : (*tscale)[k - 1];
2404 for (
int m=0; m<nspace; ++m)
2405 spij += As->
item(m,i) * Bs->
item(m,j);
2407 pij += spij * ts * At_k * Bt_k;
2410 p->item(i, j) = pij;
void getInteger(const std::string &key, int &data)
Reads an integer associated with the supplied key from the currently open database file.
void putInteger(const std::string &key, int data)
Writes an integer associated with the supplied key to currently open database file.
virtual bool create(const std::string &file_name, const MPI_Comm comm=MPI_COMM_NULL) override
Creates a new HDF5 database file with the supplied name.
virtual bool open(const std::string &file_name, const std::string &type, const MPI_Comm comm=MPI_COMM_NULL) override
Opens an existing HDF5 database file with the supplied name.
virtual void getDoubleArray(const std::string &key, double *data, int nelements, const bool distributed=false)
Reads an array of doubles associated with the supplied key from the currently open HDF5 database file...
virtual void putDoubleArray(const std::string &key, const double *const data, int nelements, const bool distributed=false)
Writes an array of doubles associated with the supplied key to the currently open HDF5 database file.
virtual bool close()
Closes the currently open HDF5 database file.
void local_read(const std::string &base_file_name, int rank)
read a single rank of a distributed Matrix into (a) HDF file(s).
void setSize(int num_rows, int num_cols)
Sets the number of rows and columns of the matrix and reallocates storage if needed....
Matrix * getFirstNColumns(int n) const
Get the first N columns of a matrix.
double * getData() const
Get the matrix data as a pointer.
bool balanced() const
Returns true if rows of matrix are load-balanced.
Matrix * inverse() const
Computes and returns the inverse of this.
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
bool distributed() const
Returns true if the Matrix is distributed.
void rescale_rows_max()
Rescale every matrix row by its maximum absolute value.
Matrix & operator=(const Matrix &rhs)
Assignment operator.
void write(const std::string &base_file_name) const
write Matrix into (a) HDF file(s).
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
void distribute(const int &local_num_rows)
Distribute this matrix rows among MPI processes, based on the specified local number of rows....
void gather()
Gather all the distributed rows among MPI processes. This becomes not distributed after this function...
Matrix * elementwise_mult(const Matrix &other) const
Multiplies two matrices element-wise.
void orthogonalize_last(int ncols=-1, bool double_pass=false, double zero_tol=1.0e-15)
Orthonormalizes the matrix's last column, assuming the previous columns are already orthonormal.
Matrix & operator+=(const Matrix &rhs)
Addition operator.
int numDistributedRows() const
Returns the number of rows of the Matrix across all processors.
Vector * getColumn(int column) const
Returns a column of the matrix (not owned by Matrix).
Matrix & operator-=(const Matrix &rhs)
Subtraction operator.
void orthogonalize(bool double_pass=false, double zero_tol=1.0e-15)
Orthonormalizes the matrix.
void multPlus(Vector &a, const Vector &b, double c) const
Computes a += this*b*c.
void print(const char *prefix) const
print Matrix into (a) ascii file(s).
Matrix * transposeMult(const Matrix &other) const
Multiplies the transpose of this Matrix with other and returns the product, reference version.
void pointwise_mult(int this_row, const Vector &other, Vector &result) const
Multiplies a specified row of this Matrix with other pointwise.
Matrix * mult(const Matrix &other) const
Multiplies this Matrix with other and returns the product, reference version.
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
int numRows() const
Returns the number of rows of the Matrix on this processor.
void read(const std::string &base_file_name)
read Matrix into (a) HDF file(s).
Matrix * elementwise_square() const
Square every element in the matrix.
void transpose()
Replaces this Matrix with its transpose (in place), in the serial square case only.
Matrix * qr_factorize() const
Computes and returns the Q of the QR factorization of this.
void rescale_cols_max()
Rescale every matrix column by its maximum absolute value.
void qrcp_pivots_transpose(int *row_pivot, int *row_pivot_owner, int pivots_requested) const
Compute the leading numColumns() column pivots from a QR decomposition with column pivots (QRCP) of t...
double * getData() const
Get the vector data as a pointer.
void setSize(int dim)
Sets the length of the vector and reallocates storage if needed. All values are initialized to zero.
bool distributed() const
Returns true if the Vector is distributed.
const double & item(int i) const
Const Vector member access.
int dim() const
Returns the dimension of the Vector on this processor.
Matrix IdentityMatrixFactory(const Vector &v)
Factory function to make an identity matrix with rows distributed in the same fashion as its Vector a...
struct ComplexEigenPair NonSymmetricRightEigenSolve(Matrix *A)
Computes the eigenvectors/eigenvalues of an NxN real nonsymmetric matrix.
struct EigenPair SymmetricRightEigenSolve(Matrix *A)
Computes the eigenvectors/eigenvalues of an NxN real symmetric matrix.
Matrix outerProduct(const Vector &v, const Vector &w)
Computes the outer product of two Vectors, v and w.
void SerialSVD(Matrix *A, Matrix *U, Vector *S, Matrix *V)
Computes the SVD of a undistributed matrix in serial.
Matrix DiagonalMatrixFactory(const Vector &v)
Factory function to make a diagonal matrix with nonzero entries as in its Vector argument....
int get_global_offsets(const int local_dim, std::vector< int > &offsets, const MPI_Comm &comm)
Save integer offsets for each MPI rank under MPI communicator comm, where each rank as the size of lo...