Просмотр исходного кода

status: Use multiprocessing for `repo status -j<num>` instead of threading

This change increases the speed of the command with parallelization with
processes.  The parallelization with threads doesn't work well, and
increasing the number of jobs to many (8 threads ~) didn't increase the speed.
Possibly, the global interpreter lock of Python affects.

Bug: https://crbug.com/gerrit/12389
Change-Id: Icbe5df8ba037dd91422b96f4e43708068d7be924
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/279936
Tested-by: Kimiyuki Onaka <kimiyuki@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
Kimiyuki Onaka 5 лет назад
Родитель
Сommit
0501b29e7a
2 измененных файлов с 22 добавлено и 33 удалено
  1. 7 0
      project.py
  2. 15 33
      subcmds/status.py

+ 7 - 0
project.py

@@ -3208,6 +3208,13 @@ class Project(object):
       self._bare = bare
       self._bare = bare
       self._gitdir = gitdir
       self._gitdir = gitdir
 
 
+    # __getstate__ and __setstate__ are required for pickling because __getattr__ exists.
+    def __getstate__(self):
+      return (self._project, self._bare, self._gitdir)
+
+    def __setstate__(self, state):
+      self._project, self._bare, self._gitdir = state
+
     def LsOthers(self):
     def LsOthers(self):
       p = GitCommand(self._project,
       p = GitCommand(self._project,
                      ['ls-files',
                      ['ls-files',

+ 15 - 33
subcmds/status.py

@@ -16,17 +16,13 @@
 
 
 from __future__ import print_function
 from __future__ import print_function
 
 
+import functools
 import glob
 import glob
-import itertools
+import multiprocessing
 import os
 import os
 
 
 from command import PagedCommand
 from command import PagedCommand
 
 
-try:
-  import threading as _threading
-except ImportError:
-  import dummy_threading as _threading
-
 from color import Coloring
 from color import Coloring
 import platform_utils
 import platform_utils
 
 
@@ -95,25 +91,20 @@ the following meanings:
     p.add_option('-q', '--quiet', action='store_true',
     p.add_option('-q', '--quiet', action='store_true',
                  help="only print the name of modified projects")
                  help="only print the name of modified projects")
 
 
-  def _StatusHelper(self, project, clean_counter, sem, quiet):
+  def _StatusHelper(self, quiet, project):
     """Obtains the status for a specific project.
     """Obtains the status for a specific project.
 
 
     Obtains the status for a project, redirecting the output to
     Obtains the status for a project, redirecting the output to
-    the specified object. It will release the semaphore
-    when done.
+    the specified object.
 
 
     Args:
     Args:
+      quiet: Where to output the status.
       project: Project to get status of.
       project: Project to get status of.
-      clean_counter: Counter for clean projects.
-      sem: Semaphore, will call release() when complete.
-      output: Where to output the status.
+
+    Returns:
+      The status of the project.
     """
     """
-    try:
-      state = project.PrintWorkTreeStatus(quiet=quiet)
-      if state == 'CLEAN':
-        next(clean_counter)
-    finally:
-      sem.release()
+    return project.PrintWorkTreeStatus(quiet=quiet)
 
 
   def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring):
   def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring):
     """find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'"""
     """find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'"""
@@ -133,27 +124,18 @@ the following meanings:
 
 
   def Execute(self, opt, args):
   def Execute(self, opt, args):
     all_projects = self.GetProjects(args)
     all_projects = self.GetProjects(args)
-    counter = itertools.count()
+    counter = 0
 
 
     if opt.jobs == 1:
     if opt.jobs == 1:
       for project in all_projects:
       for project in all_projects:
         state = project.PrintWorkTreeStatus(quiet=opt.quiet)
         state = project.PrintWorkTreeStatus(quiet=opt.quiet)
         if state == 'CLEAN':
         if state == 'CLEAN':
-          next(counter)
+          counter += 1
     else:
     else:
-      sem = _threading.Semaphore(opt.jobs)
-      threads = []
-      for project in all_projects:
-        sem.acquire()
-
-        t = _threading.Thread(target=self._StatusHelper,
-                              args=(project, counter, sem, opt.quiet))
-        threads.append(t)
-        t.daemon = True
-        t.start()
-      for t in threads:
-        t.join()
-    if not opt.quiet and len(all_projects) == next(counter):
+      with multiprocessing.Pool(opt.jobs) as pool:
+        states = pool.map(functools.partial(self._StatusHelper, opt.quiet), all_projects)
+        counter += states.count('CLEAN')
+    if not opt.quiet and len(all_projects) == counter:
       print('nothing to commit (working directory clean)')
       print('nothing to commit (working directory clean)')
 
 
     if opt.orphans:
     if opt.orphans: