2025年dockerdesktop运行错误(docker desktop is shutting down)

dockerdesktop运行错误(docker desktop is shutting down)coding UTF 8 import multiprocess as mpfrom multiprocess import Queueimport BeautifulSou as bs4from Queue import Emptyimport urllibimport json getPageData 类似于生产者 获取工作列表

大家好,我是讯享网,很高兴认识大家。

# --coding:UTF-8 --



import multiprocessing as mp

from multiprocessing import Queue

import BeautifulSoup as bs4

from Queue import Empty

import urllib

import json



”’

   getPageData 类似于生产者,获取工作列表。 getDetail 类似消费者,去获取工作的详细信息。queue 中存放工作详情页面的ID

”’



interface = ‘http://www.lagou.com/jobs/positionAjax.json?px=default&yx=10k-15k&needAddtiOnalResult=false’

detailUrl = ‘http://www.lagou.com/jobs/{0}.html’





def getPageData(task, queue, keyword=‘python’):

    while True:

        try:

            page = task.get(timeout=1)

        except Empty:

            break

        post_data = {‘kd’: keyword, ‘pn’: page, ‘first’: ‘false’}

        opener = urllib.urlopen(interface, urllib.urlencode(post_data))

        jsOnData= json.loads(opener.read())

        results = jsonData[‘content’][‘positionResult’][‘result’]

        for result in results:

            queue.put(result[‘positionId’])





def getDetail(queue, result):

    while True:

        try:

            positiOnId= queue.get(timeout=1)

        except Empty:

            print mp.current_process().name + ‘exit’

            break

        url = detailUrl.format(positionId)

        print url, mp.current_process().name

        opener = urllib.urlopen(url)

        html = opener.read()

        soup = bs4.BeautifulSoup(html)

        cOntent= soup.findAll(attrs={“class”: “job_bt”})[0]

        result.put(‘{0} {1}’.format(detailUrl.format(positionId), content))





def start(keyword=‘python’):

    task = Queue()

    queue = Queue()

    result = Queue()



    post_data = {‘kd’: keyword, ‘pn’: 1, ‘first’: ‘true’}

    opener = urllib.urlopen(interface, urllib.urlencode(post_data))

    jsOnData= json.loads(opener.read())



    # 页数

    totalCount = jsonData[‘content’][‘positionResult’][‘totalCount’]

    resultSize = jsonData[‘content’][‘positionResult’][‘resultSize’]

    pageNums = totalCount / resultSize

    if totalCount % resultSize:

        pageNums += 1

    results = jsonData[‘content’][‘positionResult’][‘result’]

    for r in results:

        queue.put(r[‘positionId’])



    # 调试前三页

    pageNums = 3

    for i in range(2, pageNums + 1):

        task.put(i)



    num_cOnsumers= mp.cpu_count()

    processes = [mp.Process(target=getDetail, args=(queue, result))

                 for _ in range(num_consumers)]

    processes.append(mp.Process(target=getPageData, args=(task, queue)))

    for p in processes:

        p.start()

    for p in processes:

        p.join()

    print ‘processes over’



    with open(‘jobs’, ‘w+’) as f:

        while not result.empty():

            a = result.get()

            f.write(a)



if name == ‘main’:

    start()


讯享网

小讯
上一篇 2025-05-16 08:55
下一篇 2025-04-29 17:11

相关推荐

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/156463.html