笔趣阁爬虫
import requests import time from bs4 import BeautifulSoup import os def Get_content(url): urls2=[] res=requests.get(url).content.decode('gbk') soup=BeautifulSoup(res,"html.parser") contents=soup.find_all("div",attrs={
"class":"nav"}) for content in contents: lis=content.find_all("li") for li in lis: urls2.append(li.a.get("href")) return urls2 def Get_url3(urls): urls3=[] for url in urls: res = requests.get(url).content.decode('gbk') soup = BeautifulSoup(res, "html.parser") # contents=soup.find_all("div",attrs={
"class":"l"}) #热门小说 contents2=soup.find_all("div",attrs={
"class":"r"}) #玄幻小说,仙侠小说,都市言情小说 contents3=soup.find_all("div",attrs={
"class":"novelslist"}) #更新小说 contents4=soup.find_all("div",attrs={
"id":"newscontent"}) for i,content in enumerate(contents): dts=content.find_all("dt") for dt in dts: try: urls3.append(dt.a.get("href")) except Exception as e: print(i) for c in contents2: lis=c.find_all("li") for li in lis: urls3.append(li.a
讯享网
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/50249.html