16 #include "utils/HDFDatabase.h"
17 #include "utils/mpi_utils.h"
24 #ifdef CAROM_HAS_ELEMENTAL
28 #include "scalapack_wrapper.h"
31 #define dsyev CAROM_FC_GLOBAL(dsyev, DSYEV)
32 #define dgeev CAROM_FC_GLOBAL(dgeev, DGEEV)
33 #define dgetrf CAROM_FC_GLOBAL(dgetrf, DGETRF)
34 #define dgetri CAROM_FC_GLOBAL(dgetri, DGETRI)
35 #define dgeqp3 CAROM_FC_GLOBAL(dgeqp3, DGEQP3)
36 #define dgesdd CAROM_FC_GLOBAL(dgesdd, DGESDD)
40 void dsyev(
char*,
char*,
int*,
double*,
int*,
double*,
double*,
int*,
int*);
43 void dgeev(
char*,
char*,
int*,
double*,
int*,
double*,
double*,
double*,
int*,
44 double*,
int*,
double*,
int*,
int*);
47 void dgetrf(
int*,
int*,
double*,
int*,
int*,
int*);
50 void dgetri(
int*,
double*,
int*,
int*,
double*,
int*,
int*);
53 void dgeqp3(
int*,
int*,
double*,
int*,
int*,
double*,
double*,
int*,
int*);
56 void dgesdd(
char*,
int*,
int*,
double*,
int*,
57 double*,
double*,
int*,
double*,
int*,
58 double*,
int*,
int*,
int*);
77 d_distributed(distributed),
80 CAROM_VERIFY(num_rows > 0);
81 CAROM_VERIFY(num_cols > 0);
83 MPI_Initialized(&mpi_init);
85 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
86 MPI_Comm_rank(MPI_COMM_WORLD, &d_rank);
94 std::default_random_engine generator;
95 std::normal_distribution<double> normal_distribution(0.0, 1.0);
96 for (
int i = 0; i < num_rows; i++) {
97 for (
int j = 0; j < num_cols; j++) {
98 item(i,j) = normal_distribution(generator);
112 d_distributed(distributed),
113 d_owns_data(copy_data)
115 CAROM_VERIFY(mat != 0);
116 CAROM_VERIFY(num_rows > 0);
117 CAROM_VERIFY(num_cols > 0);
119 MPI_Initialized(&mpi_init);
121 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
122 MPI_Comm_rank(MPI_COMM_WORLD, &d_rank);
130 memcpy(d_mat, mat, d_alloc_size*
sizeof(
double));
134 if (num_rows > INT_MAX / num_cols)
135 CAROM_ERROR(
"Matrix::Matrix- new size exceeds maximum integer value!\n");
138 d_alloc_size = num_rows*num_cols;
139 d_num_cols = num_cols;
140 d_num_rows = num_rows;
142 calculateNumDistributedRows();
151 d_distributed(other.d_distributed),
155 MPI_Initialized(&mpi_init);
157 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
158 MPI_Comm_rank(MPI_COMM_WORLD, &d_rank);
164 setSize(other.d_num_rows, other.d_num_cols);
165 memcpy(d_mat, other.d_mat, d_alloc_size*
sizeof(
double));
170 if (d_owns_data && d_mat) {
179 d_distributed = rhs.d_distributed;
180 d_num_procs = rhs.d_num_procs;
181 setSize(rhs.d_num_rows, rhs.d_num_cols);
182 memcpy(d_mat, rhs.d_mat, d_num_rows*d_num_cols*
sizeof(
double));
190 CAROM_VERIFY(rhs.d_num_rows == d_num_rows);
191 CAROM_VERIFY(rhs.d_num_cols == d_num_cols);
192 for(
int i=0; i<d_num_rows*d_num_cols; ++i) d_mat[i] += rhs.d_mat[i];
200 CAROM_VERIFY(rhs.d_num_rows == d_num_rows);
201 CAROM_VERIFY(rhs.d_num_cols == d_num_cols);
202 for(
int i=0; i<d_num_rows*d_num_cols; ++i) d_mat[i] -= rhs.d_mat[i];
222 const MPI_Comm comm = MPI_COMM_WORLD;
227 const int first_rank_with_fewer = num_total_rows % d_num_procs;
229 CAROM_VERIFY(MPI_Comm_rank(comm, &my_rank) == MPI_SUCCESS);
231 const int min_rows_per_rank = num_total_rows / d_num_procs;
232 const bool has_extra_row = my_rank < first_rank_with_fewer;
233 const int max_rows_on_rank = min_rows_per_rank + has_extra_row;
234 const bool has_enough_rows = (d_num_rows >= min_rows_per_rank);
235 const bool has_too_many_rows = (d_num_rows > max_rows_on_rank);
237 int result = (has_enough_rows && !has_too_many_rows);
238 const int reduce_count = 1;
239 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
244 comm) == MPI_SUCCESS);
253 for(
int i=0; i<d_num_rows*d_num_cols; ++i) {
259 std::unique_ptr<Matrix>
262 CAROM_VERIFY(n > 0 && n <= d_num_cols);
264 Matrix* first_n_columns =
new Matrix(d_num_rows, n, d_distributed);
266 return std::unique_ptr<Matrix>(first_n_columns);
275 CAROM_VERIFY(n > 0 && n <= d_num_cols);
280 for (
int i = 0; i < d_num_rows; i++)
282 for (
int j = 0; j < n; j++)
299 result.
setSize(d_num_rows, other.d_num_cols);
302 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
303 for (
int other_col = 0; other_col < other.d_num_cols; ++other_col) {
304 double result_val = 0.0;
305 for (
int entry = 0; entry < d_num_cols; ++entry) {
306 result_val +=
item(this_row, entry)*other.
item(entry, other_col);
308 result.
item(this_row, other_col) = result_val;
326 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
327 double result_val = 0.0;
328 for (
int entry = 0; entry < d_num_cols; ++entry) {
329 result_val +=
item(this_row, entry)*other.
item(entry);
331 result.
item(this_row) = result_val;
348 for (
int entry = 0; entry < d_num_cols; ++entry) {
349 result.
item(entry) =
item(this_row, entry)*other.
item(entry);
363 for (
int entry = 0; entry < d_num_cols; ++entry) {
364 other.
item(entry) *=
item(this_row, entry);
379 result.
setSize(d_num_rows, other.d_num_cols);
382 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
383 for (
int other_col = 0; other_col < d_num_cols; ++other_col) {
384 result.
item(this_row, other_col) =
item(this_row,
385 other_col) * other.
item(this_row, other_col);
395 result.
setSize(d_num_rows, d_num_cols);
398 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
399 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
400 const double a =
item(this_row, this_col);
401 result.
item(this_row, this_col) = a * a;
417 for (
int this_row = 0; this_row < d_num_rows; ++this_row) {
419 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
420 tmp +=
item(this_row, this_col)*b.
item(this_col);
422 a.
item(this_row) += tmp*c;
436 result.
setSize(d_num_cols, other.d_num_cols);
439 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
440 for (
int other_col = 0; other_col < other.d_num_cols; ++other_col) {
441 double result_val = 0.0;
442 for (
int entry = 0; entry < d_num_rows; ++entry) {
443 result_val +=
item(entry, this_col)*other.
item(entry, other_col);
445 result.
item(this_col, other_col) = result_val;
448 if (d_distributed && d_num_procs > 1) {
449 int new_mat_size = d_num_cols*other.d_num_cols;
450 MPI_Allreduce(MPI_IN_PLACE,
473 for (
int this_col = 0; this_col < d_num_cols; ++this_col) {
474 double result_val = 0.0;
475 for (
int entry = 0; entry < d_num_rows; ++entry) {
476 result_val +=
item(entry, this_col)*other.
item(entry);
478 result.
item(this_col) = result_val;
480 if (d_distributed && d_num_procs > 1) {
481 MPI_Allreduce(MPI_IN_PLACE,
495 for (
int i = 0; i < d_num_rows; i++) {
510 result.
setSize(d_num_rows, d_num_cols);
515 int mtx_size = d_num_rows;
516 int lwork = mtx_size*mtx_size;
517 int* ipiv =
new int [mtx_size];
518 double* work =
new double [lwork];
521 for (
int row = 0; row < mtx_size; ++row) {
522 for (
int col = 0; col < mtx_size; ++col) {
523 result.
item(col, row) =
item(row, col);
527 dgetrf(&mtx_size, &mtx_size, result.d_mat, &mtx_size, ipiv, &info);
528 CAROM_VERIFY(info == 0);
530 dgetri(&mtx_size, result.d_mat, &mtx_size, ipiv, work, &lwork, &info);
531 CAROM_VERIFY(info == 0);
535 for (
int row = 0; row < mtx_size; ++row) {
536 for (
int col = row+1; col < mtx_size; ++col) {
537 double tmp = result.
item(row, col);
538 result.
item(row, col) = result.
item(col, row);
539 result.
item(col, row) = tmp;
551 for (
int i=0; i<n-1; ++i)
553 for (
int j=i+1; j<n; ++j)
555 const double t = d_mat[i*n+j];
556 d_mat[i*n+j] = d_mat[j*n+i];
571 int mtx_size = d_num_rows;
572 int lwork = mtx_size*mtx_size;
573 int* ipiv =
new int [mtx_size];
574 double* work =
new double [lwork];
577 for (
int row = 0; row < mtx_size; ++row) {
578 for (
int col = row+1; col < mtx_size; ++col) {
579 double tmp =
item(row, col);
581 item(col, row) = tmp;
585 dgetrf(&mtx_size, &mtx_size, d_mat, &mtx_size, ipiv, &info);
586 CAROM_VERIFY(info == 0);
588 dgetri(&mtx_size, d_mat, &mtx_size, ipiv, work, &lwork, &info);
589 CAROM_VERIFY(info == 0);
593 for (
int row = 0; row < mtx_size; ++row) {
594 for (
int col = row+1; col < mtx_size; ++col) {
595 double tmp =
item(row, col);
597 item(col, row) = tmp;
624 for (
int i=0; i<
numRows(); ++i)
642 const bool success = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
643 CAROM_ASSERT(success);
645 std::string filename_str = prefix + std::to_string(my_rank);
646 const char * filename = filename_str.c_str();
647 FILE * pFile = fopen(filename,
"w");
648 for (
int row = 0; row < d_num_rows; ++row) {
649 for (
int col = 0; col < d_num_cols; ++col) {
650 fprintf(pFile,
" %25.20e\t",
item(row,col));
652 fprintf(pFile,
"\n");
660 CAROM_VERIFY(!base_file_name.empty());
663 MPI_Initialized(&mpi_init);
666 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
673 sprintf(tmp,
".%06d", rank);
674 std::string full_file_name = base_file_name + tmp;
676 database.
create(full_file_name);
678 sprintf(tmp,
"distributed");
680 sprintf(tmp,
"num_rows");
682 sprintf(tmp,
"num_cols");
684 sprintf(tmp,
"data");
692 CAROM_VERIFY(!base_file_name.empty());
695 MPI_Initialized(&mpi_init);
698 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
699 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
707 sprintf(tmp,
".%06d", rank);
708 std::string full_file_name = base_file_name + tmp;
710 database.
open(full_file_name,
"r");
712 sprintf(tmp,
"distributed");
717 sprintf(tmp,
"num_rows");
720 sprintf(tmp,
"num_cols");
723 sprintf(tmp,
"data");
732 CAROM_VERIFY(!base_file_name.empty());
735 MPI_Initialized(&mpi_init);
737 MPI_Comm_size(MPI_COMM_WORLD, &d_num_procs);
744 sprintf(tmp,
".%06d", rank);
745 std::string full_file_name = base_file_name + tmp;
747 database.
open(full_file_name,
"r");
749 sprintf(tmp,
"distributed");
754 sprintf(tmp,
"num_rows");
757 sprintf(tmp,
"num_cols");
760 sprintf(tmp,
"data");
770 CAROM_VERIFY(d_owns_data);
772 std::vector<int> row_offsets;
775 CAROM_VERIFY(num_total_rows == d_num_rows);
776 int local_offset = row_offsets[d_rank] * d_num_cols;
777 const int new_size = local_num_rows * d_num_cols;
779 double *d_new_mat =
new double [new_size];
781 memcpy(d_new_mat, &d_mat[local_offset], 8 * new_size);
785 d_alloc_size = new_size;
787 d_num_distributed_rows = d_num_rows;
788 d_num_rows = local_num_rows;
790 d_distributed =
true;
797 CAROM_VERIFY(d_owns_data);
799 std::vector<int> row_offsets;
802 CAROM_VERIFY(num_total_rows == d_num_distributed_rows);
803 const int new_size = d_num_distributed_rows * d_num_cols;
805 int *data_offsets =
new int[row_offsets.size() - 1];
806 int *data_cnts =
new int[row_offsets.size() - 1];
807 for (
int k = 0; k < row_offsets.size() - 1; k++)
809 data_offsets[k] = row_offsets[k] * d_num_cols;
810 data_cnts[k] = (row_offsets[k+1] - row_offsets[k]) * d_num_cols;
813 double *d_new_mat =
new double [new_size] {0.0};
814 CAROM_VERIFY(MPI_Allgatherv(d_mat, d_num_rows * d_num_cols, MPI_DOUBLE,
815 d_new_mat, data_cnts, data_offsets, MPI_DOUBLE,
816 MPI_COMM_WORLD) == MPI_SUCCESS);
819 delete [] data_offsets;
822 d_alloc_size = new_size;
824 d_num_rows = d_num_distributed_rows;
826 d_distributed =
false;
830 Matrix::calculateNumDistributedRows() {
831 if (d_distributed && d_num_procs > 1) {
832 int num_total_rows = d_num_rows;
833 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
838 MPI_COMM_WORLD) == MPI_SUCCESS);
839 d_num_distributed_rows = num_total_rows;
842 d_num_distributed_rows = d_num_rows;
848 std::vector<Matrix*> QR;
850 return std::unique_ptr<Matrix>(QR[0]);
855 std::vector<Matrix*> QRraw;
858 for (
int i=0; i<2; ++i)
859 QR.push_back(std::unique_ptr<Matrix>(QRraw[i]));
863 std::vector<Matrix*> & QR)
const
865 const int myid = d_rank;
868 std::vector<int> row_offset(d_num_procs + 1);
872 CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE,
878 MPI_COMM_WORLD) == MPI_SUCCESS);
879 for (
int i = d_num_procs - 1; i >= 0; i--) {
880 row_offset[i] = row_offset[i + 1] - row_offset[i];
883 CAROM_VERIFY(row_offset[0] == 0);
887 int nrow_blocks = d_num_procs;
890 int blocksize = row_offset[d_num_procs] / d_num_procs;
891 if (row_offset[d_num_procs] % d_num_procs != 0) blocksize += 1;
893 ncol_blocks, nrow_blocks, ncols, blocksize);
894 for (
int rank = 0; rank < d_num_procs; ++rank) {
895 scatter_block(&slpk, 1, row_offset[rank] + 1,
897 row_offset[rank + 1] - row_offset[rank], rank);
901 qr_init(&QRmgr, &slpk);
904 Matrix *R_matrix =
nullptr;
908 Matrix* L_dist_matrix =
new Matrix(row_offset[myid + 1] - row_offset[myid],
911 for (
int rank = 0; rank < d_num_procs; ++rank) {
912 gather_block(&L_dist_matrix->item(0, 0), QRmgr.A, 1,
913 row_offset[rank] + 1, ncols,
914 row_offset[rank + 1] - row_offset[rank], rank);
917 R_matrix =
new Matrix(ncols, ncols,
false);
920 const int numLocalRowsR = std::min(ncols, row_offset[myid + 1]);
921 for (
int i=0; i<numLocalRowsR; ++i)
923 for (
int j=i; j<ncols; ++j)
924 (*R_matrix)(i,j) = (*L_dist_matrix)(i,j);
926 for (
int j=0; j<i; ++j)
927 (*R_matrix)(i,j) = 0.0;
932 const int maxLocalRowsR = std::min(ncols, row_offset[myid + 1]);
933 const int numLocalRowsR = std::max(0, maxLocalRowsR - row_offset[myid]);
935 std::vector<int> allNumRowsR(d_num_procs);
936 MPI_Gather(&numLocalRowsR, 1, MPI_INT, allNumRowsR.data(), 1, MPI_INT,
939 std::vector<double> localRowsR;
943 localRowsR.resize(numLocalRowsR * ncols);
944 for (
int i=0; i<numLocalRowsR; ++i)
946 const int row = row_offset[myid] + i;
947 for (
int j=row; j<ncols; ++j)
948 localRowsR[(i * ncols) + j] = (*L_dist_matrix)(i,j);
950 for (
int j=0; j<row; ++j)
951 localRowsR[(i * ncols) + j] = 0.0;
955 localRowsR.resize(1);
957 delete L_dist_matrix;
959 std::vector<double> recvRowsR;
961 recvRowsR.resize((ncols - numLocalRowsR) * ncols);
963 if (recvRowsR.size() == 0)
966 std::vector<int> disp(d_num_procs);
969 for (
int i=1; i<d_num_procs; ++i)
971 allNumRowsR[i] *= ncols;
972 disp[i] = disp[i - 1] + allNumRowsR[i-1];
975 MPI_Gatherv(localRowsR.data(), myid == 0 ? 0 : numLocalRowsR * ncols,
976 MPI_DOUBLE, recvRowsR.data(),
977 allNumRowsR.data(), disp.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
981 for (
int i=numLocalRowsR; i<ncols; ++i)
983 for (
int j=0; j<ncols; ++j)
984 (*R_matrix)(i,j) = recvRowsR[(i - numLocalRowsR) * ncols + j];
988 MPI_Bcast(R_matrix->getData(), ncols * ncols, MPI_DOUBLE, 0,
993 for (
int i = row_offset[myid]; i < ncols; i++) {
994 for (
int j = 0; j < i - row_offset[myid] && j < row_offset[myid + 1]; j++) {
995 QRmgr.A->mdata[j * QRmgr.A->mm + i] = 0;
997 if (i < row_offset[myid + 1]) {
998 QRmgr.A->mdata[(i - row_offset[myid]) * QRmgr.A->mm + i] = 1;
1004 Matrix *qr_factorized_matrix =
new Matrix(row_offset[myid + 1] -
1008 for (
int rank = 0; rank < d_num_procs; ++rank) {
1009 gather_block(&qr_factorized_matrix->item(0, 0), QRmgr.A, 1,
1010 row_offset[rank] + 1, ncols,
1011 row_offset[rank + 1] - row_offset[rank], rank);
1014 free_matrix_data(&slpk);
1015 release_context(&slpk);
1021 QR[0] = qr_factorized_matrix;
1027 int* row_pivot_owner,
1028 int pivots_requested)
const
1031 return qrcp_pivots_transpose_serial(row_pivot,
1036 return qrcp_pivots_transpose_distributed(row_pivot,
1043 Matrix::qrcp_pivots_transpose_serial(
int* row_pivot,
1044 int* row_pivot_owner,
1045 int pivots_requested)
const
1052 CAROM_VERIFY(pivots_requested <=
numRows());
1053 CAROM_VERIFY(pivots_requested > 0);
1057 CAROM_VERIFY(row_pivot != NULL);
1058 CAROM_VERIFY(row_pivot_owner != NULL);
1062 int num_cols_of_transpose =
numRows();
1076 int lwork = 20 * num_cols_of_transpose + 1;
1077 double* work =
new double[lwork];
1078 double* tau =
new double[std::min(num_rows_of_transpose,
1079 num_cols_of_transpose)];
1080 int* pivot =
new int[num_cols_of_transpose] ();
1088 dgeqp3(&num_rows_of_transpose,
1089 &num_cols_of_transpose,
1091 &num_rows_of_transpose,
1099 CAROM_VERIFY(info == 0);
1103 int is_mpi_initialized, is_mpi_finalized;
1104 CAROM_VERIFY(MPI_Initialized(&is_mpi_initialized) == MPI_SUCCESS);
1105 CAROM_VERIFY(MPI_Finalized(&is_mpi_finalized) == MPI_SUCCESS);
1107 if(is_mpi_initialized && !is_mpi_finalized) {
1108 const MPI_Comm my_comm = MPI_COMM_WORLD;
1109 CAROM_VERIFY(MPI_Comm_rank(my_comm, &my_rank) == MPI_SUCCESS);
1117 for (
int i = 0; i < pivots_requested; i++) {
1118 row_pivot[i] = pivot[i] - 1;
1119 row_pivot_owner[i] = my_rank;
1129 Matrix::qrcp_pivots_transpose_distributed(
int* row_pivot,
1130 int* row_pivot_owner,
1131 int pivots_requested)
1137 #ifdef CAROM_HAS_ELEMENTAL
1141 return qrcp_pivots_transpose_distributed_elemental
1142 (row_pivot, row_pivot_owner, pivots_requested);
1144 qrcp_pivots_transpose_distributed_scalapack
1145 (row_pivot, row_pivot_owner, pivots_requested);
1150 Matrix::qrcp_pivots_transpose_distributed_scalapack
1151 (
int* row_pivot,
int* row_pivot_owner,
int pivots_requested)
const
1156 int num_total_rows = d_num_rows;
1157 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
1162 MPI_COMM_WORLD) == MPI_SUCCESS);
1164 int *row_offset =
new int[d_num_procs + 1];
1165 row_offset[d_num_procs] = num_total_rows;
1168 MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
1170 row_offset[my_rank] = d_num_rows;
1172 CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE,
1178 MPI_COMM_WORLD) == MPI_SUCCESS);
1180 for (
int i = d_num_procs - 1; i >= 0; i--) {
1181 row_offset[i] = row_offset[i + 1] - row_offset[i];
1183 CAROM_VERIFY(row_offset[0] == 0);
1186 int blocksize = row_offset[d_num_procs] / d_num_procs;
1187 if (row_offset[d_num_procs] % d_num_procs != 0) blocksize += 1;
1189 initialize_matrix(&slpk, d_num_cols, row_offset[d_num_procs], 1, d_num_procs, 1,
1192 CAROM_VERIFY(d_num_cols <=
1195 for (
int rank = 0; rank < d_num_procs; ++rank) {
1197 scatter_block(&slpk, 1, row_offset[rank]+1,
1199 d_num_cols, row_offset[rank+1] - row_offset[rank],
1204 qr_init(&QRmgr, &slpk);
1205 qrfactorize(&QRmgr);
1208 CAROM_VERIFY(0 < pivots_requested && pivots_requested <= QRmgr.ipivSize);
1209 CAROM_VERIFY(pivots_requested <= std::max(d_num_rows, d_num_cols));
1211 const int scount = std::max(0, std::min(pivots_requested,
1212 row_offset[my_rank+1]) - row_offset[my_rank]);
1213 int *mypivots = (scount > 0) ?
new int[scount] : NULL;
1215 for (
int i=0; i<scount; ++i)
1216 mypivots[i] = QRmgr.ipiv[i]-1;
1218 int *rcount = (my_rank == 0) ?
new int[d_num_procs] : NULL;
1219 int *rdisp = (my_rank == 0) ?
new int[d_num_procs] : NULL;
1221 MPI_Gather(&scount, 1, MPI_INT, rcount, 1, MPI_INT, 0, MPI_COMM_WORLD);
1226 for (
int i = 1; i<d_num_procs; ++i)
1227 rdisp[i] = rdisp[i-1] + rcount[i-1];
1229 CAROM_VERIFY(rdisp[d_num_procs-1] + rcount[d_num_procs-1] == pivots_requested);
1232 MPI_Gatherv(mypivots, scount, MPI_INT, row_pivot, rcount, rdisp, MPI_INT, 0,
1239 for (
int i=0; i<pivots_requested; ++i)
1241 row_pivot_owner[i] = -1;
1242 for (
int j=d_num_procs-1; j>=0; --j)
1244 if (row_offset[j] <= row_pivot[i])
1246 row_pivot_owner[i] = j;
1252 CAROM_VERIFY(row_pivot_owner[i] >= 0);
1253 CAROM_VERIFY(row_offset[row_pivot_owner[i]] <= row_pivot[i]
1254 && row_pivot[i] < row_offset[row_pivot_owner[i]+1]);
1259 for (
int i=0; i<scount; ++i)
1260 row_pivot[i] = QRmgr.ipiv[i]-1;
1263 free_matrix_data(&slpk);
1264 release_context(&slpk);
1271 delete [] row_offset;
1275 Matrix::qrcp_pivots_transpose_distributed_elemental
1276 (
int* row_pivot,
int* row_pivot_owner,
int pivots_requested)
1279 #ifdef CAROM_HAS_ELEMENTAL
1285 qrcp_pivots_transpose_distributed_elemental_balanced
1286 (row_pivot, row_pivot_owner, pivots_requested);
1289 qrcp_pivots_transpose_distributed_elemental_unbalanced
1290 (row_pivot, row_pivot_owner, pivots_requested);
1293 CAROM_VERIFY(
false);
1298 Matrix::qrcp_pivots_transpose_distributed_elemental_balanced
1299 (
int* row_pivot,
int* row_pivot_owner,
int pivots_requested)
1302 #ifdef CAROM_HAS_ELEMENTAL
1329 CAROM_VERIFY(row_pivot != NULL);
1330 CAROM_VERIFY(row_pivot_owner != NULL);
1333 const MPI_Comm comm = MPI_COMM_WORLD;
1334 const int master_rank = 0;
1336 int num_total_rows = d_num_rows;
1337 const int reduce_count = 1;
1338 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
1343 comm) == MPI_SUCCESS);
1347 CAROM_VERIFY(pivots_requested <= num_total_rows);
1348 CAROM_VERIFY(pivots_requested > 0);
1356 const El::Grid grid(comm, 1);
1360 CAROM_VERIFY(El::mpi::Size(grid.RowComm()) == d_num_procs);
1365 El::Int height =
static_cast<El::Int
>(
numColumns());
1366 El::Int width =
static_cast<El::Int
>(
numRows());
1367 El::Int root =
static_cast<El::Int
>(master_rank);
1368 El::DistMatrix<double> scratch(height, width, grid, root);
1381 CAROM_VERIFY(scratch.RowOwner(0) == scratch.RowOwner(1));
1383 CAROM_VERIFY(MPI_Comm_rank(comm, &my_rank) == MPI_SUCCESS);
1384 El::Int rank_as_row =
static_cast<El::Int
>(my_rank);
1385 CAROM_VERIFY(scratch.ColOwner(rank_as_row) == rank_as_row);
1388 El::DistMatrix<double> householder_scalars(grid);
1389 El::DistMatrix<double> diagonal(grid);
1390 El::DistPermutation perm(grid);
1391 El::QRCtrl<double> ctrl;
1418 for (
int row = 0; row < d_num_rows; row++) {
1419 El::Int el_loc_col =
static_cast<El::Int
>(row);
1420 El::Int el_global_col = scratch.GlobalCol(el_loc_col);
1421 for (
int col = 0; col < d_num_cols; col++) {
1422 El::Int el_loc_row =
static_cast<El::Int
>(col);
1423 El::Int el_global_row = scratch.GlobalRow(el_loc_row);
1424 scratch.Set(el_global_row, el_global_col, this->
item(row, col));
1430 El::QR(scratch, householder_scalars, diagonal, perm);
1434 for (
size_t i = 0; i < pivots_requested; i++) {
1435 El::Int el_i =
static_cast<El::Int
>(i);
1436 El::Int el_perm_i = perm.Image(el_i);
1441 El::Int el_loc_i = scratch.LocalCol(el_perm_i);
1442 int loc_i =
static_cast<int>(el_loc_i);
1443 row_pivot[i] = loc_i;
1449 El::Int el_owner = scratch.ColOwner(el_perm_i);
1450 int owner =
static_cast<int>(el_owner);
1451 row_pivot_owner[i] = owner;
1454 CAROM_VERIFY(
false);
1459 Matrix::qrcp_pivots_transpose_distributed_elemental_unbalanced
1460 (
int* row_pivot,
int* row_pivot_owner,
int pivots_requested)
1463 #ifdef CAROM_HAS_ELEMENTAL
1483 CAROM_VERIFY(row_pivot != NULL);
1484 CAROM_VERIFY(row_pivot_owner != NULL);
1487 const MPI_Comm comm = MPI_COMM_WORLD;
1488 const int master_rank = 0;
1490 int num_total_rows = d_num_rows;
1491 const int reduce_count = 1;
1492 CAROM_VERIFY(MPI_Allreduce(MPI_IN_PLACE,
1497 comm) == MPI_SUCCESS);
1501 CAROM_VERIFY(pivots_requested <= num_total_rows);
1502 CAROM_VERIFY(pivots_requested > 0);
1510 const El::Grid grid(comm, 1);
1514 CAROM_VERIFY(El::mpi::Size(grid.RowComm()) == d_num_procs);
1519 El::Int height =
static_cast<El::Int
>(
numColumns());
1520 El::Int width =
static_cast<El::Int
>(
numRows());
1521 El::Int root =
static_cast<El::Int
>(master_rank);
1522 El::DistMatrix<double> scratch(height, width, grid, root);
1535 CAROM_VERIFY(scratch.RowOwner(0) == scratch.RowOwner(1));
1537 CAROM_VERIFY(MPI_Comm_rank(comm, &my_rank) == MPI_SUCCESS);
1538 El::Int rank_as_row =
static_cast<El::Int
>(my_rank);
1539 CAROM_VERIFY(scratch.ColOwner(rank_as_row) == rank_as_row);
1542 El::DistMatrix<double> householder_scalars(grid);
1543 El::DistMatrix<double> diagonal(grid);
1544 El::DistPermutation perm(grid);
1545 El::QRCtrl<double> ctrl;
1562 int *row_offset =
new int[d_num_procs + 1];
1563 row_offset[d_num_procs + 1] = num_total_rows;
1565 row_offset[my_rank] = d_num_rows;
1566 const int send_recv_count = 1;
1567 CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE,
1573 comm) == MPI_SUCCESS);
1575 for (
int i = d_num_procs - 1; i >= 0; i--) {
1576 row_offset[i] = row_offset[i + 1] - row_offset[i];
1578 CAROM_VERIFY(row_offset[0] == 0);
1582 for (
int row = row_offset[my_rank]; row < row_offset[my_rank + 1]; row++) {
1583 El::Int el_loc_col =
static_cast<El::Int
>(row - row_offset[my_rank]);
1584 El::Int el_global_col = scratch.GlobalCol(el_loc_col);
1585 for (
int col = 0; col < d_num_cols; col++) {
1586 El::Int el_loc_row =
static_cast<El::Int
>(col);
1587 El::Int el_global_row = scratch.GlobalRow(el_loc_row);
1588 scratch.Set(el_global_row, el_global_col, this->
item(row, col));
1594 El::QR(scratch, householder_scalars, diagonal, perm);
1598 for (
size_t i = 0; i < pivots_requested; i++) {
1599 El::Int el_i =
static_cast<El::Int
>(i);
1600 El::Int el_perm_i = perm.Image(el_i);
1606 int global_row_index =
static_cast<int>(el_perm_i);
1608 for (rank = 0; rank < d_num_procs; rank++) {
1609 bool is_at_or_above_lower_bound = (global_row_index >= row_offset[rank]);
1610 bool is_below_upper_bound = (global_row_index < row_offset[rank + 1]);
1611 if (is_at_or_above_lower_bound && is_below_upper_bound) {
1612 row_pivot_owner[i] = rank;
1620 row_pivot[i] = global_row_index - row_offset[rank];
1624 delete [] row_offset;
1626 CAROM_VERIFY(
false);
1633 int const num_passes = double_pass ? 2 : 1;
1635 for (
int work = 0; work < d_num_cols; ++work)
1638 for (
int k = 0; k < num_passes; k++)
1640 for (
int col = 0; col < work; ++col)
1642 double factor = 0.0;
1644 for (
int i = 0; i < d_num_rows; ++i)
1645 factor +=
item(i, col) *
item(i, work);
1647 if (d_distributed && d_num_procs > 1)
1649 CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &factor, 1,
1650 MPI_DOUBLE, MPI_SUM,
1654 for (
int i = 0; i < d_num_rows; ++i)
1655 item(i, work) -= factor *
item(i, col);
1662 for (
int i = 0; i < d_num_rows; ++i)
1663 norm +=
item(i, work) *
item(i, work);
1665 if (d_distributed && d_num_procs > 1)
1667 CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &norm, 1, MPI_DOUBLE,
1668 MPI_SUM, MPI_COMM_WORLD)
1671 if (norm > zero_tol)
1673 norm = 1.0 / sqrt(norm);
1674 for (
int i = 0; i < d_num_rows; ++i)
1675 item(i, work) *= norm;
1683 if (ncols == -1) ncols = d_num_cols;
1684 CAROM_VERIFY((ncols > 0) && (ncols <= d_num_cols));
1686 const int last_col = ncols - 1;
1688 int const num_passes = double_pass ? 2 : 1;
1691 for (
int k = 0; k < num_passes; k++)
1693 for (
int col = 0; col < last_col; ++col)
1695 double factor = 0.0;
1697 for (
int i = 0; i < d_num_rows; ++i)
1698 factor +=
item(i, col) *
item(i, last_col);
1700 if (d_distributed && d_num_procs > 1)
1702 CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &factor, 1, MPI_DOUBLE,
1703 MPI_SUM, MPI_COMM_WORLD)
1706 for (
int i = 0; i < d_num_rows; ++i)
1707 item(i, last_col) -= factor *
item(i, col);
1714 for (
int i = 0; i < d_num_rows; ++i)
1715 norm +=
item(i, last_col) *
item(i, last_col);
1717 if (d_distributed && d_num_procs > 1)
1719 CAROM_VERIFY( MPI_Allreduce(MPI_IN_PLACE, &norm, 1, MPI_DOUBLE,
1720 MPI_SUM, MPI_COMM_WORLD) == MPI_SUCCESS );
1722 if (norm > zero_tol)
1724 norm = 1.0 / sqrt(norm);
1725 for (
int i = 0; i < d_num_rows; ++i)
1726 item(i, last_col) *= norm;
1738 for (
int i = 0; i < d_num_rows; i++)
1741 double row_max = fabs(
item(i, 0));
1742 for (
int j = 1; j < d_num_cols; j++)
1744 if (fabs(
item(i, j)) > row_max)
1745 row_max = fabs(
item(i, j));
1749 if (row_max > 1.0e-14)
1751 for (
int j = 0; j < d_num_cols; j++)
1752 item(i, j) /= row_max;
1765 double local_max[d_num_cols];
1766 for (
int j = 0; j < d_num_cols; j++)
1768 local_max[j] = fabs(
item(0, j));
1769 for (
int i = 1; i < d_num_rows; i++)
1771 if (fabs(
item(i, j)) > local_max[j])
1772 local_max[j] = fabs(
item(i, j));
1777 double global_max[d_num_cols];
1778 if (d_distributed && d_num_procs > 1)
1780 MPI_Allreduce(&local_max, &global_max, d_num_cols, MPI_DOUBLE, MPI_MAX,
1785 for (
int i = 0; i < d_num_cols; i++)
1786 global_max[i] = local_max[i];
1790 for (
int j = 0; j < d_num_cols; j++)
1792 if (global_max[j] > 1.0e-14)
1794 double tmp = 1.0 / global_max[j];
1795 for (
int i = 0; i < d_num_rows; i++)
1811 int result_num_rows = v.
dim();
1812 int result_num_cols;
1823 result_num_cols = w.
dim();
1833 int process_local_num_cols = w.
dim();
1834 MPI_Comm comm = MPI_COMM_WORLD;
1835 MPI_Datatype num_cols_datatype = MPI_INT;
1837 MPI_Comm_size(comm, &num_procs);
1838 std::vector<int> num_cols_on_proc(num_procs, 0);
1839 int num_procs_send_count = 1;
1840 int num_procs_recv_count = 1;
1841 MPI_Allgather(&process_local_num_cols,
1842 num_procs_send_count,
1844 num_cols_on_proc.data(),
1845 num_procs_recv_count,
1851 std::vector<int> gathered_w_displacements(num_procs, 0);
1852 result_num_cols = 0;
1853 for (
int i = 0; i < num_cols_on_proc.size(); i++)
1855 gathered_w_displacements.at(i) = result_num_cols;
1856 result_num_cols += num_cols_on_proc.at(i);
1862 std::vector<double> w_data(process_local_num_cols);
1863 for (
int i = 0; i < w_data.size(); i++)
1865 w_data.at(i) = w(i);
1868 std::vector<double> gathered_w_data(result_num_cols);
1869 MPI_Datatype entry_datatype = MPI_DOUBLE;
1870 MPI_Allgatherv(w_data.data(),
1871 process_local_num_cols,
1873 gathered_w_data.data(),
1874 num_cols_on_proc.data(),
1875 gathered_w_displacements.data(),
1879 gathered_w.
setSize(result_num_cols);
1880 for (
int i = 0; i < gathered_w_data.size(); i++)
1882 gathered_w(i) = gathered_w_data.at(i);
1887 Matrix result(result_num_rows, result_num_cols, is_distributed);
1890 for (
int i = 0; i < result_num_rows; i++)
1892 for (
int j = 0; j < result_num_cols; j++)
1894 result(i, j) = v(i) * gathered_w(j);
1903 const int resultNumRows = v.
dim();
1904 int resultNumColumns, processRowStartIndex, processNumColumns;
1908 if (
false == isDistributed)
1910 resultNumColumns = processNumColumns = resultNumRows;
1911 processRowStartIndex = 0;
1915 using sizeType = std::vector<int>::size_type;
1922 const MPI_Comm comm = MPI_COMM_WORLD;
1924 MPI_Comm_size(comm, &numProcesses);
1926 numRowsOnProcess(
static_cast<sizeType
>(numProcesses));
1928 const MPI_Datatype indexType = MPI_INT;
1929 MPI_Allgather(&resultNumRows, one, indexType,
1930 numRowsOnProcess.data(), one, indexType, comm);
1943 std::vector<int> rowStartIndexOnProcess(numProcesses);
1944 resultNumColumns = 0;
1945 for (sizeType i = 0; i < rowStartIndexOnProcess.size(); i++)
1947 rowStartIndexOnProcess.at(i) = resultNumColumns;
1948 resultNumColumns += numRowsOnProcess.at(i);
1951 MPI_Comm_rank(comm, &processNumber);
1952 processRowStartIndex =
1953 rowStartIndexOnProcess.at(
static_cast<sizeType
>(processNumber));
1960 Matrix diagonalMatrix(resultNumRows, resultNumColumns, isDistributed);
1961 for (
int i = 0; i < resultNumRows; i++)
1963 for (
int j = 0; j < resultNumColumns; j++)
1969 const double entry = (j == (i + processRowStartIndex)) ? v(i) : 0.0;
1970 diagonalMatrix(i, j) = entry;
1974 return diagonalMatrix;
1986 char jobz =
'V', uplo =
'U';
1990 int lwork = std::max(1, 10*k-1);
1991 double* work =
new double [lwork];
1992 double* eigs =
new double [k];
1997 for (
int row = 0; row < k; ++row) {
1998 for (
int col = row+1; col < k; ++col) {
1999 double tmp = ev->
item(row, col);
2000 ev->
item(row, col) = ev->
item(col, row);
2001 ev->
item(col, row) = tmp;
2006 dsyev(&jobz, &uplo, &k, ev->
getData(), &k, eigs, work, &lwork, &info);
2010 for (
int row = 0; row < k; ++row) {
2011 for (
int col = row+1; col < k; ++col) {
2012 double tmp = ev->
item(row, col);
2013 ev->
item(row, col) = ev->
item(col, row);
2014 ev->
item(col, row) = tmp;
2020 for (
int i = 0; i < k; i++)
2022 eigenpair.
eigs.push_back(eigs[i]);
2033 char jobvl =
'N', jobrl =
'V';
2037 int lwork = std::max(k*k, 10*k);
2038 double* work =
new double [lwork];
2039 double* e_real =
new double [k];
2040 double* e_imaginary =
new double [k];
2041 double* ev_l = NULL;
2047 for (
int row = 0; row < k; ++row) {
2048 for (
int col = row+1; col < k; ++col) {
2049 double tmp = A_copy->
item(row, col);
2050 A_copy->
item(row, col) = A_copy->
item(col, row);
2051 A_copy->
item(col, row) = tmp;
2056 dgeev(&jobvl, &jobrl, &k, A_copy->
getData(), &k, e_real, e_imaginary, ev_l,
2057 &k, ev_r->
getData(), &k, work, &lwork, &info);
2061 for (
int row = 0; row < k; ++row) {
2062 for (
int col = row+1; col < k; ++col) {
2063 double tmp = ev_r->
item(row, col);
2064 ev_r->
item(row, col) = ev_r->
item(col, row);
2065 ev_r->
item(col, row) = tmp;
2074 for (
int i = 0; i < k; ++i)
2076 for (
int row = 0; row < k; ++row) {
2079 if (e_imaginary[i] != 0)
2081 for (
int row = 0; row < k; ++row) {
2093 for (
int i = 0; i < k; i++)
2095 eigenpair.
eigs.push_back(std::complex<double>(e_real[i], e_imaginary[i]));
2100 delete [] e_imaginary;
2119 U =
new Matrix(m, std::min(m, n),
false);
2124 U->
setSize(m, std::min(m, n));
2129 V =
new Matrix(std::min(m, n), n,
false);
2133 V->
setSize(std::min(m, n), n);
2138 S =
new Vector(n,
false);
2149 int mn = std::min(m, n);
2150 int lwork = 4 * mn * mn + 7 * mn;
2151 double* work =
new double [lwork];
2152 int iwork[8*std::min(m, n)];
2157 &ldv, work, &lwork, iwork, &info);
2159 CAROM_VERIFY(info == 0);
2165 SerialSVDDecomposition
SerialSVD(Matrix* A)
2167 CAROM_VERIFY(!A->distributed());
2174 SerialSVDDecomposition decomp;
2184 std::unique_ptr<Matrix> SpaceTimeProduct(
const CAROM::Matrix & As,
2187 const std::vector<double> *tscale,
2188 const bool At0at0,
const bool Bt0at0,
const bool lagB,
2194 const int AtOS = At0at0 ? 1 : 0;
2195 const int BtOS0 = Bt0at0 ? 1 : 0;
2196 const int BtOS = BtOS0 + (lagB ? 1 : 0);
2201 const int nspace = As.
numRows();
2202 const int ntime = At.
numRows() + AtOS;
2204 CAROM_VERIFY(nspace == Bs.
numRows() && ntime == Bt.
numRows() + BtOS0);
2210 const int k0AB = std::max(AtOS, BtOS);
2211 const int k00 = skip0 ? 1 : 0;
2212 const int k0 = std::max(k0AB, k00);
2214 CAROM_VERIFY(tscale == NULL || (tscale->size() == ntime-1 && k0 > 0));
2218 for (
int i=0; i<nrows; ++i)
2220 for (
int j=0; j<ncols; ++j)
2224 for (
int k=k0; k<ntime; ++k)
2228 const double At_k = At.
item(k - AtOS,i);
2229 const double Bt_k = Bt.
item(k - BtOS,j);
2230 const double ts = (tscale == NULL) ? 1.0 : (*tscale)[k - 1];
2233 for (
int m=0; m<nspace; ++m)
2234 spij += As.
item(m,i) * Bs.
item(m,j);
2236 pij += spij * ts * At_k * Bt_k;
2239 p->item(i, j) = pij;
2243 return std::unique_ptr<Matrix>(p);
void getInteger(const std::string &key, int &data)
Reads an integer associated with the supplied key from the currently open database file.
void putInteger(const std::string &key, int data)
Writes an integer associated with the supplied key to currently open database file.
virtual void getDoubleArray(const std::string &key, double *data, int nelements, const bool distributed=false) override
Reads an array of doubles associated with the supplied key from the currently open HDF5 database file...
virtual bool create(const std::string &file_name, const MPI_Comm comm=MPI_COMM_NULL) override
Creates a new HDF5 database file with the supplied name.
virtual bool open(const std::string &file_name, const std::string &type, const MPI_Comm comm=MPI_COMM_NULL) override
Opens an existing HDF5 database file with the supplied name.
virtual void putDoubleArray(const std::string &key, const double *const data, int nelements, const bool distributed=false) override
Writes an array of doubles associated with the supplied key to the currently open HDF5 database file.
virtual bool close() override
Closes the currently open HDF5 database file.
void local_read(const std::string &base_file_name, int rank)
read a single rank of a distributed Matrix into (a) HDF file(s).
void setSize(int num_rows, int num_cols)
Sets the number of rows and columns of the matrix and reallocates storage if needed....
double * getData() const
Get the matrix data as a pointer.
bool balanced() const
Returns true if rows of matrix are load-balanced.
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
bool distributed() const
Returns true if the Matrix is distributed.
void rescale_rows_max()
Rescale every matrix row by its maximum absolute value.
Matrix & operator=(const Matrix &rhs)
Assignment operator.
void write(const std::string &base_file_name) const
write Matrix into (a) HDF file(s).
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
void distribute(const int &local_num_rows)
Distribute this matrix rows among MPI processes, based on the specified local number of rows....
void gather()
Gather all the distributed rows among MPI processes. This becomes not distributed after this function...
void orthogonalize_last(int ncols=-1, bool double_pass=false, double zero_tol=1.0e-15)
Orthonormalizes the matrix's last column, assuming the previous columns are already orthonormal.
std::unique_ptr< Matrix > qr_factorize() const
Computes and returns the Q of the QR factorization of this.
Matrix & operator+=(const Matrix &rhs)
Addition operator.
int numDistributedRows() const
Returns the number of rows of the Matrix across all processors.
Matrix & operator-=(const Matrix &rhs)
Subtraction operator.
std::unique_ptr< Vector > getColumn(int column) const
Returns a deep copy of a column of the matrix.
void orthogonalize(bool double_pass=false, double zero_tol=1.0e-15)
Orthonormalizes the matrix.
std::unique_ptr< Matrix > inverse() const
Computes and returns the inverse of this.
void multPlus(Vector &a, const Vector &b, double c) const
Computes a += this*b*c.
void print(const char *prefix) const
print Matrix into (a) ascii file(s).
std::unique_ptr< Matrix > getFirstNColumns(int n) const
Get the first N columns of a matrix.
std::unique_ptr< Matrix > elementwise_mult(const Matrix &other) const
Multiplies two matrices element-wise.
void pointwise_mult(int this_row, const Vector &other, Vector &result) const
Multiplies a specified row of this Matrix with other pointwise.
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
int numRows() const
Returns the number of rows of the Matrix on this processor.
void read(const std::string &base_file_name)
read Matrix into (a) HDF file(s).
std::unique_ptr< Matrix > mult(const Matrix &other) const
Multiplies this Matrix with other and returns the product.
void transpose()
Replaces this Matrix with its transpose (in place), in the serial square case only.
std::unique_ptr< Matrix > elementwise_square() const
Square every element in the matrix.
std::unique_ptr< Matrix > transposeMult(const Matrix &other) const
Multiplies the transpose of this Matrix with other and returns the product.
void rescale_cols_max()
Rescale every matrix column by its maximum absolute value.
void qrcp_pivots_transpose(int *row_pivot, int *row_pivot_owner, int pivots_requested) const
Compute the leading numColumns() column pivots from a QR decomposition with column pivots (QRCP) of t...
double * getData() const
Get the vector data as a pointer.
void setSize(int dim)
Sets the length of the vector and reallocates storage if needed. All values are initialized to zero.
bool distributed() const
Returns true if the Vector is distributed.
const double & item(int i) const
Const Vector member access.
int dim() const
Returns the dimension of the Vector on this processor.
ComplexEigenPair NonSymmetricRightEigenSolve(const Matrix &A)
Computes the eigenvectors/eigenvalues of an NxN real nonsymmetric matrix.
Matrix IdentityMatrixFactory(const Vector &v)
Factory function to make an identity matrix with rows distributed in the same fashion as its Vector a...
Matrix outerProduct(const Vector &v, const Vector &w)
Computes the outer product of two Vectors, v and w.
void SerialSVD(Matrix *A, Matrix *U, Vector *S, Matrix *V)
Computes the SVD of a undistributed matrix in serial.
Matrix DiagonalMatrixFactory(const Vector &v)
Factory function to make a diagonal matrix with nonzero entries as in its Vector argument....
int get_global_offsets(const int local_dim, std::vector< int > &offsets, const MPI_Comm &comm)
Save integer offsets for each MPI rank under MPI communicator comm, where each rank as the size of lo...
EigenPair SymmetricRightEigenSolve(const Matrix &A)
Computes the eigenvectors/eigenvalues of an NxN real symmetric matrix.
Matrix * ev_real
The real component of the eigenvectors in matrix form.
std::vector< std::complex< double > > eigs
The corresponding complex eigenvalues.
Matrix * ev_imaginary
The imaginary component of the eigenvectors in matrix form.
std::vector< double > eigs
The corresponding eigenvalues.
Matrix * ev
The eigenvectors in matrix form.