Changeset 3296

Show
Ignore:
Timestamp:
11/06/09 12:37:19 (2 weeks ago)
Author:
tautges
Message:

Couple of improvements and bug fixes to parallel ghost exchange code.

- add debug-only code to check for duplicate procs in proc list for entities
- add a little MPE stuff, commented out
- in build_shared_hps list, terminate list of procs/handles
- in a couple places, print error and return failure instead of asserting
- couple of improvements to print_buffer to make it more informative
- adding a pcomm_serial test, which does the equivalent of ghost exchange but

completely in serial

Passes make check, except for iMeshP tests.

Location:
MOAB/trunk/parallel
Files:
1 added
3 modified

Legend:

Unmodified
Added
Removed
  • MOAB/trunk/parallel/MBParallelComm.cpp

    r3255 r3296  
    4848 
    4949#undef DEBUG_MPE 
     50//#define DEBUG_MPE 1 
    5051#ifdef DEBUG_MPE 
    5152#include "mpe.h" 
     
    5455int SHAREDV_START, SHAREDV_END; 
    5556int RESOLVE_START, RESOLVE_END; 
     57int ENTITIES_START, ENTITIES_END; 
     58int RHANDLES_START, RHANDLES_END; 
     59 
    5660#endif 
    5761#undef DEBUG_COMM 
     62//#define DEBUG_COMM 1 
    5863#undef DEBUG_PACKING 
    5964#undef DEBUG_MSGS 
     
    489494                        MPI_UNSIGNED_CHAR, to_proc,  
    490495                        mesg_tag+1, procConfig.proc_comm(), &send_req2); 
     496      // put this inside so we can stop on completion in the debugger 
    491497    if (success != MPI_SUCCESS) return MB_FAILURE; 
    492498  } 
     
    868874      PACK_INTS(buff_ptr, tmp_procs, num_ents); 
    869875      PACK_EH(buff_ptr, tmp_handles, num_ents); 
     876 
     877#ifndef NDEBUG 
     878        // check for duplicates in proc list 
     879      unsigned int dp = 0; 
     880      for (; dp < MAX_SHARING_PROCS && -1 != tmp_procs[dp]; dp++) 
     881        dumprocs.insert(tmp_procs[dp]); 
     882      assert(dumprocs.size() == dp); 
     883      dumprocs.clear(); 
     884#endif       
    870885    } 
    871886  } 
     
    10161031  } 
    10171032 
     1033    // put -1 after procs and 0 after handles 
     1034  if (MAX_SHARING_PROCS > num_ents) { 
     1035    tmp_procs[num_ents] = -1; 
     1036    tmp_handles[num_ents] = 0; 
     1037  } 
     1038   
    10181039  return MB_SUCCESS; 
    10191040} 
     
    13121333    for (i = 0; i < num_ents; i++) { 
    13131334      UNPACK_INT(buff_ptr, j); 
    1314       assert(j >= 0 && "Should be non-negative # proc/handles."); 
     1335      if (j < 0) { 
     1336        std::cout << "Should be non-negative # proc/handles."; 
     1337        return MB_FAILURE; 
     1338      } 
     1339       
    13151340      buff_ptr += j * (sizeof(int)+sizeof(MBEntityHandle)); 
    13161341    } 
     
    13551380          // pointers to other procs/handles 
    13561381        UNPACK_INT(buff_save, num_ps); 
    1357         assert("Shouldn't ever be fewer than 1 procs here." && 0 < num_ps); 
     1382        if (0 >= num_ps) { 
     1383          std::cout << "Shouldn't ever be fewer than 1 procs here." << std::endl; 
     1384          return MB_FAILURE; 
     1385        } 
     1386         
    13581387        UNPACK_INTS(buff_save, &ps[0], num_ps); 
    13591388        UNPACK_EH(buff_save, &hs[0], num_ps); 
     
    14411470                                     (created_here ? (PSTATUS_GHOST | PSTATUS_NOT_OWNED) : 0))); 
    14421471        RRA(""); 
    1443        
     1472 
    14441473          // need to send this new handle to all sharing procs 
    14451474        if (!is_iface) { 
     
    15071536} 
    15081537 
    1509 MBErrorCode MBParallelComm::print_buffer(unsigned char *buff_ptr, int mesg_tag,  
     1538MBErrorCode MBParallelComm::print_buffer(unsigned char *buff_ptr,  
     1539                                         int mesg_tag,  
    15101540                                         int from_proc, bool sent)  
    15111541{ 
     
    15161546            << " to/from proc " << from_proc << "; contents:" << std::endl; 
    15171547 
     1548  int msg_length; 
     1549  unsigned char *orig_ptr = buff_ptr; 
     1550  UNPACK_INT(buff_ptr, msg_length); 
     1551  std::cout << msg_length << " bytes..." << std::endl; 
     1552 
    15181553  if (MB_MESG_ENTS == mesg_tag) { 
    1519     int total_size; 
    1520     UNPACK_INT(buff_ptr, total_size); 
    1521     std::cout << total_size << " entities..." << std::endl; 
    15221554 
    15231555      // 1. # entities = E 
     
    15331565    for (i = 0; i < num_ents; i++) { 
    15341566      UNPACK_INT(buff_ptr, j); 
     1567      if (0 > j) return MB_FAILURE; 
    15351568      ps.resize(j); 
    15361569      hs.resize(j); 
    1537       std::cout << "Entity " << i << ": # procs = " << j << std::endl; 
     1570      std::cout << "Entity " << i << ", # procs = " << j << std::endl; 
    15381571      UNPACK_INTS(buff_ptr, &ps[0], j); 
    15391572      UNPACK_EH(buff_ptr, &hs[0], j); 
     
    15441577      for (k = 0; k < j; k++) std::cout << hs[k] << " "; 
    15451578      std::cout << std::endl; 
    1546     } 
    1547    
    1548   while (true) { 
    1549     MBEntityType this_type = MBMAXTYPE; 
    1550     UNPACK_TYPE(buff_ptr, this_type); 
    1551     assert(this_type != MBENTITYSET); 
     1579 
     1580      if (buff_ptr-orig_ptr > msg_length) { 
     1581        std::cout << "End of buffer..." << std::endl; 
     1582        return MB_FAILURE; 
     1583      } 
     1584    } 
     1585   
     1586    while (true) { 
     1587      MBEntityType this_type = MBMAXTYPE; 
     1588      UNPACK_TYPE(buff_ptr, this_type); 
     1589      assert(this_type != MBENTITYSET); 
    15521590 
    15531591        // MBMAXTYPE signifies end of entities data 
     
    15881626          std::cout << std::endl; 
    15891627        } 
     1628 
     1629        if (buff_ptr-orig_ptr > msg_length) { 
     1630          std::cout << "End of buffer..." << std::endl; 
     1631          return MB_FAILURE; 
     1632        } 
    15901633      } 
    15911634    } 
     
    15931636   
    15941637  else if (MB_MESG_REMOTE_HANDLES) { 
     1638    int num_bytes; 
     1639    UNPACK_INT(buff_ptr, num_bytes); 
     1640    std::cout << num_bytes << " bytes..." << std::endl; 
    15951641    int num_ents; 
    15961642    UNPACK_INT(buff_ptr, num_ents); 
     
    16071653                << L1p[i] << std::endl; 
    16081654    } 
     1655 
     1656    if (buff_ptr-orig_ptr > msg_length) { 
     1657      std::cout << "End of buffer..." << std::endl; 
     1658      return MB_FAILURE; 
     1659    } 
     1660 
    16091661  } 
    16101662  else if (MB_MESG_TAGS) { 
     
    17061758  RRA(""); 
    17071759   
     1760#ifndef NDEBUG 
     1761  { 
     1762      // check for duplicates in proc list 
     1763    std::set<unsigned int> dumprocs; 
     1764    unsigned int dp = 0; 
     1765    for (; (int) dp < num_ps && -1 != ps[dp]; dp++) 
     1766      dumprocs.insert(ps[dp]); 
     1767    assert(dp == dumprocs.size()); 
     1768  } 
     1769#endif       
     1770 
    17081771    // add any new sharing data 
    17091772  bool changed = false; 
     
    18121875    RRA("Couldn't set sharedhs tag."); 
    18131876    pstat |= (PSTATUS_MULTISHARED | PSTATUS_SHARED); 
     1877 
     1878#ifndef NDEBUG 
     1879    { 
     1880        // check for duplicates in proc list 
     1881      std::set<unsigned int> dumprocs; 
     1882      unsigned int dp = 0; 
     1883      for (; dp < num_exist && -1 != tag_ps[dp]; dp++) 
     1884        dumprocs.insert(tag_ps[dp]); 
     1885      assert(dp == dumprocs.size()); 
     1886    } 
     1887#endif       
    18141888  } 
    18151889  else if (num_exist == 2 || num_exist == 1) { 
    18161890    if (tag_ps[0] == (int) procConfig.proc_rank()) { 
    1817       assert(2 == num_exist); 
     1891      assert(2 == num_exist && tag_ps[1] != (int) procConfig.proc_rank()); 
    18181892      tag_ps[0] = tag_ps[1]; 
    18191893      tag_hs[0] = tag_hs[1]; 
     
    29313005  MPE_Log_get_state_eventIDs( &SHAREDV_START, &SHAREDV_END); 
    29323006  MPE_Log_get_state_eventIDs( &RESOLVE_START, &RESOLVE_END); 
     3007  MPE_Log_get_state_eventIDs( &ENTITIES_START, &ENTITIES_END); 
     3008  MPE_Log_get_state_eventIDs( &RHANDLES_START, &RHANDLES_END); 
    29333009  success = MPE_Describe_state(IFACE_START, IFACE_END, "Resolve interface ents", "green"); 
    29343010  success = MPE_Describe_state(GHOST_START, GHOST_END, "Exchange ghost ents", "red"); 
    29353011  success = MPE_Describe_state(SHAREDV_START, SHAREDV_END, "Resolve interface vertices", "blue"); 
    29363012  success = MPE_Describe_state(RESOLVE_START, RESOLVE_END, "Resolve shared ents", "purple"); 
     3013  success = MPE_Describe_state(ENTITIES_START, ENTITIES_END, "Exchange shared ents", "yellow"); 
     3014  success = MPE_Describe_state(RHANDLES_START, RHANDLES_END, "Remote handles", "cyan"); 
    29373015#endif 
    29383016} 
     
    37093787    // post ghost irecv's for ghost entities from all communicating procs 
    37103788    //=========================================== 
     3789#ifdef DEBUG_MPE 
     3790    MPE_Log_event(ENTITIES_START, procConfig.proc_rank(), "Starting entity exchange."); 
     3791#endif 
    37113792    // index reqs the same as buffer/sharing procs indices 
    37123793  std::vector<MPI_Request> recv_reqs(buffProcs.size(), MPI_REQUEST_NULL); 
     
    37643845                         sendReqs[ind], sendReqs[ind+buffProcs.size()]); 
    37653846    RRA("Failed to Isend in ghost exchange."); 
     3847     
     3848//    if (1 == num_layers)  
     3849//      print_buffer(&ownerSBuffs[ind][0], MB_MESG_ENTS, *proc_it, true); 
    37663850  } 
    37673851 
     
    37953879       
    37963880      std::cerr << "Received from " << status[0].MPI_SOURCE 
    3797                 << ": count = " << this_count << ", tag = " << status[0].MPI_TAG; 
     3881                << ", count = " << this_count << ", tag = " << status[0].MPI_TAG; 
    37983882      if (MB_MESG_ENTS+1 == status[0].MPI_TAG) std::cerr << " (second)"; 
    37993883      std::cerr << std::endl; 
     
    38113895     
    38123896    if (done) { 
     3897#ifdef DEBUG_MSGS 
     3898      print_buffer(&ghostRBuffs[ind][0], MB_MESG_ENTS, buffProcs[ind], false); 
     3899#endif   
    38133900      unsigned char *buff_ptr = &ghostRBuffs[ind][sizeof(int)]; 
    3814 #ifdef DEBUG_MSGS 
    3815       print_buffer(buff_ptr-sizeof(int), MB_MESG_ENTS, buffProcs[ind], false); 
    3816 #endif   
    38173901      result = unpack_entities(buff_ptr, 
    38183902                               store_remote_handles, ind, is_iface, 
    38193903                               L1hloc, L1hrem, L1p, L2hloc, L2hrem, L2p, new_ents); 
    3820       RRA("Failed to unpack entities."); 
     3904      if (MB_SUCCESS != result) { 
     3905        std::cout << "Failed to unpack entities.  Buffer contents:" << std::endl; 
     3906        print_buffer(&ghostRBuffs[ind][0], MB_MESG_ENTS, buffProcs[ind], false); 
     3907        return result; 
     3908      } 
    38213909 
    38223910      if (recv_reqs.size() != buffProcs.size()) { 
     
    38353923  } 
    38363924     
     3925#ifdef DEBUG_MPE 
     3926    MPE_Log_event(ENTITIES_END, procConfig.proc_rank(), "Ending entity exchange."); 
     3927#endif 
     3928 
    38373929  if (is_iface) { 
    38383930      // need to check over entities I sent and make sure I received  
     
    38803972    // post recvs for remote handles of my sent ents 
    38813973    //=========================================== 
     3974#ifdef DEBUG_MPE 
     3975    MPE_Log_event(RHANDLES_START, procConfig.proc_rank(), "Starting remote handles."); 
     3976#endif 
    38823977  for (ind = 0, proc_it = buffProcs.begin();  
    38833978       proc_it != buffProcs.end(); proc_it++, ind++) { 
     
    39424037       
    39434038      std::cerr << "Received from " << status[0].MPI_SOURCE 
    3944                 << ": count = " << this_count << ", tag = " << status[0].MPI_TAG; 
     4039                << ", count = " << this_count << ", tag = " << status[0].MPI_TAG; 
    39454040      if (MB_MESG_REMOTE_HANDLES_SECOND == status[0].MPI_TAG)  
    39464041        std::cerr << " (second)"; 
     
    39554050    if (done) { 
    39564051        // incoming remote handles 
     4052#ifdef DEBUG_MSGS 
     4053      print_buffer(&ghostRBuffs[ind][0], MB_MESG_REMOTE_HANDLES, buffProcs[ind], false); 
     4054#endif   
    39574055      buff_ptr = &ghostRBuffs[ind][sizeof(int)]; 
    3958 #ifdef DEBUG_MSGS 
    3959       print_buffer(buff_ptr, MB_MESG_REMOTE_HANDLES, buffProcs[ind], false); 
    3960 #endif   
    39614056      result = unpack_remote_handles(buffProcs[ind], buff_ptr, 
    39624057                                     L2hloc, L2hrem, L2p); 
     
    39694064  } 
    39704065     
     4066#ifdef DEBUG_MPE 
     4067    MPE_Log_event(RHANDLES_END, procConfig.proc_rank(), "Ending remote handles."); 
     4068#endif 
    39714069#ifdef DEBUG_MPE 
    39724070      MPE_Log_event(GHOST_END, procConfig.proc_rank(),  
     
    44184516      else hpair[0] = 0; 
    44194517    } 
    4420     assert(hpair[0] && hpair[1]); 
     4518    if (!(hpair[0] && hpair[1])) return MB_FAILURE; 
    44214519    int this_proc = from_proc; 
    44224520    result = update_remote_data(hpair[0], &this_proc, hpair+1, 1, 0); 
     
    57105808   
    57115809  if (!bad_ents.empty()) { 
    5712     std::cout << "Found bad entities in check_local_shared:" << std::endl; 
     5810    std::cout << "Found bad entities in check_local_shared, proc rank " 
     5811              << procConfig.proc_rank() << "," << std::endl; 
    57135812    std::vector<std::string>::iterator vit; 
    57145813    for (rit = bad_ents.begin(), vit = errors.begin(); rit != bad_ents.end(); rit++, vit++) { 
  • MOAB/trunk/parallel/Makefile.am

    r3256 r3296  
    4747     crystal.h errmem.h types.h 
    4848 
    49   MOAB_PARALLEL_TEST += pcomm_unit parallel_unit_tests uber_parallel_test scdtest 
     49  MOAB_PARALLEL_TEST += pcomm_unit parallel_unit_tests uber_parallel_test scdtest pcomm_serial 
    5050 
    5151if PARALLEL_HDF5 
     
    8989uber_parallel_test_SOURCES = uber_parallel_test.cpp 
    9090uber_parallel_test_LDADD = ../libMOAB.la 
     91pcomm_serial_SOURCES = pcomm_serial.cpp 
     92pcomm_serial_LDADD = ../libMOAB.la 
    9193 
    9294scdtest_SOURCES = scdtest.cpp 
  • MOAB/trunk/parallel/pcomm_unit.cpp

    r3255 r3296  
    4949/** Test pack/unpack of shared entities in 3d*/ 
    5050void test_pack_shared_entities_3d(); 
    51 /** Test pack/unpack of arbitrary mesh file */ 
    52 void test_pack_shared_arbitrary(); 
    5351/** Test filter_pstatus function*/ 
    5452void test_filter_pstatus(); 
     
    7775  num_err += RUN_TEST( test_pack_shared_entities_2d ); 
    7876  num_err += RUN_TEST( test_pack_shared_entities_3d ); 
    79   num_err += RUN_TEST( test_pack_shared_arbitrary ); 
    8077  num_err += RUN_TEST( test_filter_pstatus ); 
    8178   
     
    18631860 
    18641861  for (unsigned int i = 0; i < 4; i++) 
    1865     delete pc[i]; 
    1866 } 
    1867  
    1868 void test_pack_shared_arbitrary() 
    1869 { 
    1870 #define NP 3 
    1871   MBCore moab[NP]; 
    1872   MBParallelComm *pc[NP]; 
    1873   for (unsigned int i = 0; i < NP; i++) { 
    1874     pc[i] = new MBParallelComm(&moab[i]); 
    1875     pc[i]->set_rank(i); 
    1876     pc[i]->set_size(NP); 
    1877   } 
    1878  
    1879   std::vector<int> pa_vec; 
    1880   pa_vec.push_back(ReadParallel::PA_READ); 
    1881   pa_vec.push_back(ReadParallel::PA_GET_FILESET_ENTS); 
    1882   pa_vec.push_back(ReadParallel::PA_DELETE_NONLOCAL); 
    1883   MBErrorCode rval; 
    1884   std::vector<int> partition_tag_vals; 
    1885   bool partition_distrib = false; 
    1886  
    1887 #ifdef SRCDIR 
    1888   const char *fnames[] = {STRINGIFY(SRCDIR) "/ptest.cub"}; 
    1889 #else 
    1890   const char *fnames[] = {"./ptest.cub"}; 
    1891 #endif 
    1892    
    1893   std::string ptag_name("GEOM_DIMENSION"); 
    1894   partition_tag_vals.push_back(3); 
    1895   partition_distrib = true; 
    1896    
    1897     //std::string ptag_name("MATERIAL_SET"); 
    1898     //partition_distrib = true; 
    1899    
    1900   FileOptions fopts(NULL); 
    1901    
    1902   for (unsigned int i = 0; i < NP; i++) { 
    1903     ReadParallel rp(moab+i, pc[i]); 
    1904     MBEntityHandle tmp_set = 0; 
    1905     rval = rp.load_file(fnames, 1, tmp_set, ReadParallel::POPT_READ_DELETE, 
    1906                         ptag_name,  
    1907                         partition_tag_vals, partition_distrib, false, pa_vec,  
    1908                         fopts, NULL, 0, NULL, i, false, -1, -1, -1, -1, 0); 
    1909     CHECK_ERR(rval); 
    1910   } 
    1911    
    1912   rval = MBParallelComm::resolve_shared_ents(pc, NP, 3); 
    1913   CHECK_ERR(rval); 
    1914  
    1915     // exchange interface cells 
    1916   rval = MBParallelComm::exchange_ghost_cells(pc, NP, -1, -1, 0, true); 
    1917   CHECK_ERR(rval); 
    1918    
    1919     // now 1 layer of hex ghosts 
    1920   rval = MBParallelComm::exchange_ghost_cells(pc, NP, 3, 0, 1, true); 
    1921   CHECK_ERR(rval); 
    1922  
    1923   for (unsigned int i = 0; i < NP; i++) 
    19241862    delete pc[i]; 
    19251863}