Install BipartiteMatchings headers for SuperLU_DIST. Removes global variables and code related to performance measurement that is not useful when used in a library setting. --- a/BipartiteMatchings/ApproxWeightPerfectMatching.h +++ b/BipartiteMatchings/ApproxWeightPerfectMatching.h @@ -9,7 +9,7 @@ #ifndef ApproxWeightPerfectMatching_h #define ApproxWeightPerfectMatching_h -#include "../CombBLAS.h" +#include "CombBLAS.h" #include "BPMaximalMatching.h" #include "BPMaximumMatching.h" #include @@ -39,9 +39,6 @@ std::shared_ptr commGrid; }; -double t1Comp, t1Comm, t2Comp, t2Comm, t3Comp, t3Comm, t4Comp, t4Comm, t5Comp, t5Comm, tUpdateMateComp; - - template std::vector> ExchangeData(std::vector>> & tempTuples, MPI_Comm World) { @@ -391,7 +388,7 @@ -int ThreadBuffLenForBinning(int itemsize, int nbins) +inline int ThreadBuffLenForBinning(int itemsize, int nbins) { // 1MB shared cache (per 2 cores) in KNL #ifndef L2_CACHE_SIZE @@ -417,7 +414,6 @@ - double tstart = MPI_Wtime(); MPI_Comm World = param.commGrid->GetWorld(); @@ -528,9 +524,6 @@ } } - t1Comp = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); - // Step 3: Communicate data std::vector recvcnt (param.nprocs); @@ -548,7 +541,6 @@ std::vector< std::tuple > recvTuples1(totrecv); MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World); MPI_Type_free(&MPI_tuple); - t1Comm = MPI_Wtime() - tstart; return recvTuples1; } @@ -730,9 +722,6 @@ // Step 4: Communicate data - t2Comp = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); - std::vector recvcnt (param.nprocs); std::vector rdispls (param.nprocs, 0); @@ -748,7 +737,6 @@ std::vector< std::tuple > recvTuples1(totrecv); MPI_Alltoallv(sendTuples.data(), sendcnt.data(), sdispls.data(), MPI_tuple, recvTuples1.data(), recvcnt.data(), rdispls.data(), MPI_tuple, World); MPI_Type_free(&MPI_tuple); - t2Comm = MPI_Wtime() - tstart; return recvTuples1; } @@ -836,7 +824,6 @@ param.myrank = myrank; param.commGrid = commGrid; - double t1CompAll = 0, t1CommAll = 0, t2CompAll = 0, t2CommAll = 0, t3CompAll = 0, t3CommAll = 0, t4CompAll = 0, t4CommAll = 0, t5CompAll = 0, t5CommAll = 0, tUpdateMateCompAll = 0, tUpdateWeightAll = 0; // ----------------------------------------------------------- // replicate mate vectors for mateCol2Row @@ -975,11 +962,7 @@ } //vector< tuple >().swap(recvTuples1); - double t3Comp = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); recvTuples1 = ExchangeData1(tempTuples1, World); - double t3Comm = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); std::vector> bestTuplesPhase4 (lncol); // we could have used lnrow in both bestTuplesPhase3 and bestTuplesPhase4 @@ -1041,14 +1024,9 @@ //vector< tuple >().swap(recvTuples1); - double t4Comp = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); std::vector> recvWinnerTuples = ExchangeData1(winnerTuples, World); - double t4Comm = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); - // at the owner of (mj,j) std::vector> rowBcastTuples(recvWinnerTuples.size()); //(mi,mj) std::vector> colBcastTuples(recvWinnerTuples.size()); //(j,i) @@ -1065,15 +1043,10 @@ colBcastTuples[k] = std::make_tuple(j,i); rowBcastTuples[k] = std::make_tuple(mj,mi); } - double t5Comp = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); std::vector> updatedR2C = MateBcast(rowBcastTuples, RowWorld); std::vector> updatedC2R = MateBcast(colBcastTuples, ColWorld); - double t5Comm = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); - #ifdef THREADED #pragma omp parallel for #endif @@ -1095,13 +1068,9 @@ } - double tUpdateMateComp = MPI_Wtime() - tstart; - tstart = MPI_Wtime(); // update weights of matched edges // we can do better than this since we are doing sparse updates ReplicateMateWeights(param, dcsc, colptr, RepMateC2R, RepMateWR2C, RepMateWC2R); - double tUpdateWeight = MPI_Wtime() - tstart; - weightPrev = weightCur; weightCur = MatchingWeight(RepMateWC2R, RowWorld, minw); @@ -1110,32 +1079,8 @@ //UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R); //CheckMatching(mateRow2Col,mateCol2Row); - if(myrank==0) - { - std::cout << t1Comp << " " << t1Comm << " "<< t2Comp << " " << t2Comm << " " << t3Comp << " " << t3Comm << " " << t4Comp << " " << t4Comm << " " << t5Comp << " " << t5Comm << " " << tUpdateMateComp << " " << tUpdateWeight << std::endl; - - t1CompAll += t1Comp; - t1CommAll += t1Comm; - t2CompAll += t2Comp; - t2CommAll += t2Comm; - t3CompAll += t3Comp; - t3CommAll += t3Comm; - t4CompAll += t4Comp; - t4CommAll += t4Comm; - t5CompAll += t5Comp; - t5CommAll += t5Comm; - tUpdateMateCompAll += tUpdateMateComp; - tUpdateWeightAll += tUpdateWeight; - - } } - if(myrank==0) - { - std::cout << "=========== overal timing ==========" << std::endl; - std::cout << t1CompAll << " " << t1CommAll << " " << t2CompAll << " " << t2CommAll << " " << t3CompAll << " " << t3CommAll << " " << t4CompAll << " " << t4CommAll << " " << t5CompAll << " " << t5CommAll << " " << tUpdateMateCompAll << " " << tUpdateWeightAll << std::endl; - } - // update the distributed mate vectors from replicated mate vectors UpdateMatching(mateRow2Col, mateCol2Row, RepMateR2C, RepMateC2R); //weightCur = MatchingWeight(RepMateWC2R, RowWorld); --- a/BipartiteMatchings/BPMaximalMatching.h +++ b/BipartiteMatchings/BPMaximalMatching.h @@ -1,7 +1,7 @@ #ifndef BP_MAXIMAL_MATCHING_H #define BP_MAXIMAL_MATCHING_H -#include "../CombBLAS.h" +#include "CombBLAS.h" #include #include #include @@ -14,8 +14,6 @@ #define GREEDY 1 #define KARP_SIPSER 2 #define DMD 3 -MTRand GlobalMT(123); // for reproducible result -double tTotalMaximal; namespace combblas { @@ -25,7 +25,7 @@ void MaximalMatching(Par_DCSC_Bool & A, Par_DCSC_Bool & AT, FullyDistVec& mateRow2Col, FullyDistVec& mateCol2Row, FullyDistVec& degColRecv, int type, bool rand=true) { - + static MTRand GlobalMT(123); // for reproducible result typedef VertexTypeML < IT, IT> VertexType; int nprocs, myrank; MPI_Comm_size(MPI_COMM_WORLD,&nprocs); @@ -354,8 +354,6 @@ } - tTotalMaximal = MPI_Wtime() - tStart; - IT cardinality = mateRow2Col.Count([](IT mate){return mate!=-1;}); std::vector totalTimes(timing[0].size(),0); for(int i=0; i #include #include @@ -11,7 +11,6 @@ #include #include #include "MatchingDefs.h" -double tTotalMaximum; namespace combblas { @@ -231,7 +231,7 @@ void maximumMatching(SpParMat < IT, NT, DER > & A, FullyDistVec& mateRow2Col, FullyDistVec& mateCol2Row, bool prune=true, bool randMM = false, bool maximizeWeight = false) { - + static MTRand GlobalMT(123); // for reproducible result typedef VertexTypeMM VertexType; int nthreads=1; @@ -420,8 +420,6 @@ MPI_Win_free(&winLeaves); - tTotalMaximum = MPI_Wtime() - tstart; - //isMaximalmatching(A, mateRow2Col, mateCol2Row, unmatchedRow, unmatchedCol); //isMatching(mateCol2Row, mateRow2Col); //todo there is a better way to check this --- a/BipartiteMatchings/MatchingDefs.h +++ b/BipartiteMatchings/MatchingDefs.h @@ -9,7 +9,7 @@ #ifndef MatchingDefs_h #define MatchingDefs_h -#include "../CombBLAS.h" +#include "CombBLAS.h" #include namespace combblas { --- a/BipartiteMatchings/Utility.h +++ b/BipartiteMatchings/Utility.h @@ -1,7 +1,7 @@ #ifndef BP_UTILITY_H #define BP_UTILITY_H -#include "../CombBLAS.h" +#include "CombBLAS.h" namespace combblas { --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,7 @@ set_property(TARGET CombBLAS PROPERTY VERSION ${CombBLAS_VERSION}) # installation install(DIRECTORY include/ DESTINATION include) install(DIRECTORY psort-1.0/include/ DESTINATION include) +install(DIRECTORY BipartiteMatchings DESTINATION include FILES_MATCHING PATTERN "*.h") install(TARGETS CombBLAS EXPORT CombBLASTargets LIBRARY DESTINATION lib ARCHIVE DESTINATION lib