2025年5i5j网页数据获取及存储

5i5j网页数据获取及存储from urllib import request from lxml import etree from pymysql conn import Mysql connect 比较得出 url 地址规律 base url https bj 5i5j com zufang huilongguan n https

大家好,我是讯享网,很高兴认识大家。
from urllib import request from lxml import etree from pymysql_conn import Mysql_connect # 比较得出url 地址规律 base_url = 'https://bj.5i5j.com/zufang/huilongguan/n{}' # https://bj.5i5j.com/zufang/huilongguan/n2/ # https://bj.5i5j.com/zufang/huilongguan/n3/ headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36', 'Cookie': '_Jo0OQK=791C6F15B09A2D259A9391EF5EEC575BFD1F7A88AAC94A312F1AD3242E4DA293BCD1CAC76497EFE5D65969EE39F8380D5847F563B66A58E82A9D79DE8682CA7D10E3B498FB9E3C853EFEE298FB9E3C853EFEE215D8BEE34E43E5C0GJ1Z1Xw==; PHPSESSID=g6oa0bq1hbcemnudl12l3sb4gf; domain=bj; _ga=GA1.2..; _gid=GA1.2..; _gat=1; yfx_c_g_u_id_=_ck; yfx_f_l_v_t_=f_t_75__r_t_75__v_t_75__r_c_0; Hm_lvt_94ed3da86ed341d64b267ec6=; Hm_lpvt_94ed3da86ed341d64b267ec6=' } # 准备sql 语句 sql = 'insert into data_5i5j (title,region,square,zone0,price) VALUES ({},{},{},{},{})' # 创建数据库对象 mysql_object = Mysql_connect() for i in range(1,4): # 获得前3页数据 url = base_url.format(i) req = request.Request(url, headers=headers) response = request.urlopen(req).read()
    html_str = response.decode('utf-8') # 生成etree对象
    html_ele = etree.HTML(html_str)
    # 由html_ele查找li_ele
    li_list = html_ele.xpath('//ul[@class="pList"]/li') for li_ele in li_list: # 通过xpath查找所需数据 title = li_ele.xpath('./div[2]/h3/a')[0].text region = li_ele.xpath('./div[2]/div[1]/p[1]/text()')[0].split(' · ')[0].replace(' ','') square = li_ele.xpath('./div[2]/div[1]/p[1]/text()')[0].split(' · ')[1].replace(' ','')[0:-2] # print(square) zone = li_ele.xpath('./div[2]/div[1]/p[2]/a')[0].text price = li_ele.xpath('./div[2]/div[1]/div/p[1]/strong')[0].text # data = (title,region,square,zone,price) # 写入数据库 mysql_object.execute_sql(sql.format(repr(title),repr(region),repr(square),repr(zone),repr(price))) 

讯享网
讯享网import pymysql class Mysql_connect(object): def __init__(self): self.db = pymysql.connect(host='127.0.0.1',user='root',password='',port=3306,database='db815',charset='utf8') self.cursor = self.db.cursor() def execute_sql(self,sql): self.cursor.execute(sql) self.db.commit() def __del__(self): self.cursor.close() self.db.close() if __name__ == '__main__': con_test = Mysql_connect() sql = 'insert into xueqiu (id)value(3)' con_test.execute_sql(sql)
小讯
上一篇 2025-03-31 07:16
下一篇 2025-02-21 20:30

相关推荐

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/52996.html