Changeset 1676

Show
Ignore:
Timestamp:
10/30/09 15:01:36 (3 weeks ago)
Author:
buettner
Message:

fixing #413 ; making sure that drain choices immediately impact other jobs

before this change, when the first job selects a drain partition, all other jobs are given the opportunity to start, so the second highest job can't claim its future resource

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/src/lib/Components/bg_base_system.py

    r1665 r1676  
    664664        # first time through, try for starting jobs based on utility scores 
    665665        drain_partitions = set() 
    666         # the sets draining_jobs and cannot_start are for efficiency, not correctness 
    667         draining_jobs = set() 
    668         cannot_start = set() 
    669         for idx in range(len(arg_list)): 
    670             winning_job = arg_list[idx] 
    671             for jj in range(idx, len(arg_list)): 
    672                 job = arg_list[jj] 
    673                  
    674                 if job['jobid'] not in cannot_start: 
    675                     partition_name = self._find_job_location(job, drain_partitions) 
    676                     if partition_name: 
    677                         best_partition_dict.update(partition_name) 
    678                         break 
    679                  
    680                 cannot_start.add(job['jobid']) 
    681                  
    682                 # we already picked a drain location for the winning job 
    683                 if winning_job['jobid'] in draining_jobs: 
    684                     continue 
    685  
    686                 location = self._find_drain_partition(winning_job) 
    687                 if location is not None: 
    688                     for p_name in location.parents: 
    689                         drain_partitions.add(self.cached_partitions[p_name]) 
    690                     for p_name in location.children: 
    691                         drain_partitions.add(self.cached_partitions[p_name]) 
    692                         self.cached_partitions[p_name].draining = True 
    693                     drain_partitions.add(location) 
    694                     #self.logger.info("job %s is draining %s" % (winning_job['jobid'], location.name)) 
    695                     location.draining = True 
    696                     draining_jobs.add(winning_job['jobid']) 
    697                      
    698  
     666         
     667        for job in arg_list: 
     668            partition_name = self._find_job_location(job, drain_partitions) 
     669            if partition_name: 
     670                best_partition_dict.update(partition_name) 
     671                break 
    699672             
    700             # at this time, we only want to try launching one job at a time 
    701             if best_partition_dict: 
    702                 break 
     673            location = self._find_drain_partition(job) 
     674            if location is not None: 
     675                for p_name in location.parents: 
     676                    drain_partitions.add(self.cached_partitions[p_name]) 
     677                for p_name in location.children: 
     678                    drain_partitions.add(self.cached_partitions[p_name]) 
     679                    self.cached_partitions[p_name].draining = True 
     680                drain_partitions.add(location) 
     681                #self.logger.info("job %s is draining %s" % (winning_job['jobid'], location.name)) 
     682                location.draining = True 
    703683         
    704684        # the next time through, try to backfill, but only if we couldn't find anything to start