Changeset 4683

Show
Ignore:
Timestamp:
06/11/09 01:30:08 (9 months ago)
Author:
balaji
Message:

Fixed a bug in the MPMD launch with Hydra. The executables were not
being correctly spread across the available partitions.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • mpich2/trunk/src/pm/hydra/ui/utils/uiu.c

    r4601 r4683  
    206206 
    207207 
     208static HYD_Status add_exec_info_to_partition(struct HYD_Exec_info *exec_info, 
     209                                             struct HYD_Partition *partition, 
     210                                             int num_procs) 
     211{ 
     212    int i; 
     213    struct HYD_Partition_exec *exec; 
     214    HYD_Status status = HYD_SUCCESS; 
     215 
     216    if (partition->exec_list == NULL) { 
     217        status = HYDU_alloc_partition_exec(&partition->exec_list); 
     218        HYDU_ERR_POP(status, "unable to allocate partition exec\n"); 
     219 
     220        partition->exec_list->pgid = 0; /* This is the COMM_WORLD exec */ 
     221 
     222        for (i = 0; exec_info->exec[i]; i++) 
     223            partition->exec_list->exec[i] = HYDU_strdup(exec_info->exec[i]); 
     224        partition->exec_list->exec[i] = NULL; 
     225 
     226        partition->exec_list->proc_count = num_procs; 
     227        partition->exec_list->prop = exec_info->prop; 
     228        partition->exec_list->prop_env = HYDU_env_list_dup(exec_info->prop_env); 
     229    } 
     230    else { 
     231        for (exec = partition->exec_list; exec->next; exec = exec->next); 
     232        status = HYDU_alloc_partition_exec(&exec->next); 
     233        HYDU_ERR_POP(status, "unable to allocate partition exec\n"); 
     234 
     235        exec = exec->next; 
     236        exec->pgid = 0; /* This is the COMM_WORLD exec */ 
     237 
     238        for (i = 0; exec_info->exec[i]; i++) 
     239            exec->exec[i] = HYDU_strdup(exec_info->exec[i]); 
     240        exec->exec[i] = NULL; 
     241 
     242        exec->proc_count = num_procs; 
     243        exec->prop = exec_info->prop; 
     244        exec->prop_env = HYDU_env_list_dup(exec_info->prop_env); 
     245    } 
     246 
     247  fn_exit: 
     248    return status; 
     249 
     250  fn_fail: 
     251    goto fn_exit; 
     252} 
     253 
     254 
    208255HYD_Status HYD_UIU_merge_exec_info_to_partition(void) 
    209256{ 
    210     int run_count, i, rem; 
     257    int partition_rem_procs, exec_rem_procs; 
    211258    struct HYD_Partition *partition; 
    212259    struct HYD_Exec_info *exec_info; 
    213     struct HYD_Partition_exec *exec; 
    214260    HYD_Status status = HYD_SUCCESS; 
    215261 
     
    219265        HYD_handle.global_core_count += partition->partition_core_count; 
    220266 
    221     for (exec_info = HYD_handle.exec_info_list; exec_info; exec_info = exec_info->next) { 
    222         /* The run_count tells us how many processes the partitions 
    223          * before us can host */ 
    224         run_count = 0; 
    225         for (partition = HYD_handle.partition_list; partition; partition = partition->next) { 
    226             if (run_count >= exec_info->exec_proc_count) 
     267    partition = HYD_handle.partition_list; 
     268    exec_info = HYD_handle.exec_info_list; 
     269    partition_rem_procs = partition->partition_core_count; 
     270    exec_rem_procs = exec_info->exec_proc_count; 
     271    while (1) { 
     272        if (exec_rem_procs <= partition_rem_procs) { 
     273            status = add_exec_info_to_partition(exec_info, partition, exec_rem_procs); 
     274            HYDU_ERR_POP(status, "unable to add executable to partition\n"); 
     275 
     276            partition_rem_procs -= exec_info->exec_proc_count; 
     277            if (partition_rem_procs == 0) 
     278                partition = HYD_handle.partition_list; 
     279 
     280            exec_info = exec_info->next; 
     281            if (exec_info == NULL) 
    227282                break; 
    228  
    229             if (partition->exec_list == NULL) { 
    230                 status = HYDU_alloc_partition_exec(&partition->exec_list); 
    231                 HYDU_ERR_POP(status, "unable to allocate partition exec\n"); 
    232  
    233                 partition->exec_list->pgid = 0; /* This is the COMM_WORLD exec */ 
    234  
    235                 for (i = 0; exec_info->exec[i]; i++) 
    236                     partition->exec_list->exec[i] = HYDU_strdup(exec_info->exec[i]); 
    237                 partition->exec_list->exec[i] = NULL; 
    238  
    239                 partition->exec_list->proc_count = 
    240                     ((exec_info->exec_proc_count / HYD_handle.global_core_count) * 
    241                      partition->partition_core_count); 
    242                 rem = (exec_info->exec_proc_count % HYD_handle.global_core_count); 
    243                 if (rem > run_count + partition->partition_core_count) 
    244                     rem = run_count + partition->partition_core_count; 
    245                 partition->exec_list->proc_count += (rem > run_count) ? (rem - run_count) : 0; 
    246  
    247                 partition->exec_list->prop = exec_info->prop; 
    248                 partition->exec_list->prop_env = HYDU_env_list_dup(exec_info->prop_env); 
    249             } 
    250             else { 
    251                 for (exec = partition->exec_list; exec->next; exec = exec->next); 
    252                 status = HYDU_alloc_partition_exec(&exec->next); 
    253                 HYDU_ERR_POP(status, "unable to allocate partition exec\n"); 
    254  
    255                 exec = exec->next; 
    256                 exec->pgid = 0; /* This is the COMM_WORLD exec */ 
    257  
    258                 for (i = 0; exec_info->exec[i]; i++) 
    259                     exec->exec[i] = HYDU_strdup(exec_info->exec[i]); 
    260                 exec->exec[i] = NULL; 
    261  
    262                 exec->proc_count = 
    263                     ((exec_info->exec_proc_count / HYD_handle.global_core_count) * 
    264                      partition->partition_core_count); 
    265                 rem = (exec_info->exec_proc_count % HYD_handle.global_core_count); 
    266                 if (rem > run_count + partition->partition_core_count) 
    267                     rem = run_count + partition->partition_core_count; 
    268                 exec->proc_count += (rem > run_count) ? (rem - run_count) : 0; 
    269  
    270                 exec->prop = exec_info->prop; 
    271                 exec->prop_env = HYDU_env_list_dup(exec_info->prop_env); 
    272             } 
    273  
    274             run_count += partition->partition_core_count; 
     283        } 
     284        else { 
     285            status = add_exec_info_to_partition(exec_info, partition, partition_rem_procs); 
     286            HYDU_ERR_POP(status, "unable to add executable to partition\n"); 
     287 
     288            exec_rem_procs -= partition_rem_procs; 
     289 
     290            partition = partition->next; 
     291            if (partition == NULL) 
     292                partition = HYD_handle.partition_list; 
     293            partition_rem_procs = partition->partition_core_count; 
    275294        } 
    276295    }