Проблема с блокировкой с многопроцессорной обработкой. Пула: ошибка травления

Я создаю модуль python для извлечения тегов из большого корпуса текста, и, хотя его результаты имеют высокое качество, он выполняется очень медленно. Я пытаюсь ускорить процесс, используя многопроцессорность, и это тоже работало, пока я не попытался ввести блокировку, чтобы только один процесс соединялся с нашей базой данных одновременно. Я не могу понять для себя, как это сделать - несмотря на многие поиски и настройку, я все еще получаю PicklingError: Can't pickle <type 'thread.lock'>: attribute lookup thread.lock failed. Здесь оскорбительный код - он работал нормально, пока я не попытался передать объект блокировки в качестве аргумента для f.

def make_network(initial_tag, max_tags = 2, max_iter = 3):
    manager = Manager()
    lock = manager.Lock()
    pool = manager.Pool(8)

    # this is a very expensive function that I would like to parallelize 
    # over a list of tags. It involves a (relatively cheap) call to an external
    # database, which needs a lock to avoid simultaneous queries. It takes a list
    # of strings (tags) as its sole argument, and returns a list of sets with entries
    # corresponding to the input list.
    f = partial(get_more_tags, max_tags = max_tags, lock = lock) 

    def _recursively_find_more_tags(tags, level):
        if level >= max_iter:
            raise StopIteration
        new_tags = pool.map(f, tags)
        to_search = []
        for i, s in zip(tags, new_tags):
            for t in s:
                joined = ' '.join(t)
                print i + "|" + joined
                to_search.append(joined)
        try:
            return _recursively_find_more_tags(to_search, level+1)
        except StopIteration:
            return None

    _recursively_find_more_tags([initial_tag], 0)

import multiprocessing from functools import partial lock = None # Global definition of lock pool = None # Global definition of pool def make_network(initial_tag, max_tags=2, max_iter=3): global lock global pool lock = multiprocessing.Lock() pool = multiprocessing.Pool(8) def get_more_tags(): global lock pass # this is a very expensive function that I would like to parallelize # over a list of tags. It involves a (relatively cheap) call to an external # database, which needs a lock to avoid simultaneous queries. It takes a # list of strings (tags) as its sole argument, and returns a list of sets # with entries corresponding to the input list. f = partial(get_more_tags, max_tags=max_tags) def _recursively_find_more_tags(tags, level): global pool if level >= max_iter: raise StopIteration new_tags = pool.map(f, tags) to_search = [] for i, s in zip(tags, new_tags): for t in s: joined = ' '.join(t) print(i + "|" + joined) to_search.append(joined) try: return _recursively_find_more_tags(to_search, level + 1) except StopIteration: return None _recursively_find_more_tags([initial_tag], 0)

Ответ 1