爬取小猪短租网信息
admin
2023-07-13 01:23:22
0
# -*- coding: utf-8 -*-
import time
import lxml
import requests
from bs4 import BeautifulSoup

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36',
          'Cookie': 'gr_user_id = c6f58a39 - ea25 - 4f58 - b448 - 545070192c4e;59a81cc7d8c04307ba183d331c373ef6_gr_session_id = e8e4b66f - 440a - 4ae7 - a76a - fe2dd2b34a26;59a81cc7d8c04307ba183d331c373ef6_gr_last_sent_sid_with_cs1 = e8e4b66f - 440a - 4ae7 - a76a - fe2dd2b34a26;59a81cc7d8c04307ba183d331c373ef6_gr_last_sent_cs1 = N % 2FA;59a81cc7d8c04307ba183d331c373ef6_gr_session_id_e8e4b66f - 440a - 4ae7 - a76a - fe2dd2b34a26 = true;grwng_uid = 9ec14ad9 - 5ac0 - 4bb1 - 81c1 - bc60d2685710;abtest_ABTest4SearchDate = b;xzuuid = 79426b52;_uab_collina = 154660443606130958890473;TY_SESSION_ID = 907f32df - c060 - 49ca - b945 - 98215cc03475;rule_math = pvzq3r06hi'}

def get_links(url):
    #dc = {}
    web_data = requests.get(url, headers = headers)
    soup = BeautifulSoup(web_data.text, 'lxml')
    #print(web_data.text)
    links = soup.select('#page_list > ul > li > a')
    for link in links:
        href = link.get('href')
        get_info(href)
        print(href)
        #a.append(href)
    #return str(dc)

def judgment_sex(class_name):
    if class_name == ['member_girl_ico']:
        return '女'
    elif class_name == ['member_boy_ico']:
        return '男'

def get_info(url):
    #data = {}
    wb_data = requests.get(url,headers = headers)
    soup = BeautifulSoup(wb_data.text,'html.parser')  #html.parser
    tittles = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h5 > em')
    addresses = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
    prices = soup.select('#pricePart > div.day_l > span')
    imgs = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
    names = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h7 > a')
    sexs = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h7 > span')
    '''
    print(tittles)
    print(addresses)
    print(prices)
    print(imgs)
    print(names)
    print(sexs)
    #print(prices.get_text())
    for price,name in zip(prices,names):
        print(price.get_text())
        print(name.get_text())
    #直接 print(prices.get_text()) 不写for报错
    for price in prices:
        print(type(price))
    print(type(prices))
    '''
    for tittle, address, price, img, name, sex in zip(tittles,addresses,prices,imgs,names,sexs):
        #print('ssk')
        data = {
            'tittle':tittle.get_text().strip(),
            'address':address.get_text().strip(),
            'price':price.get_text().strip(),
            'img':img.get("src"),
            'name':name.get_text().strip(),
            'sex':judgment_sex(sex.get("class"))
            }
        print(data)
    #return data
if __name__ == '__main__':
    urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(number) for number in range(1, 14)]
    for url in urls:
        get_links(url)
        print("------------------这里是1页分割线-----------------------")
        time.sleep(1)

    #f = open(r'C:\Users\PC\Desktop\file1.txt','a+', encoding="utf-8")
    #f.write(a)
    #f.close()
    #get_info('http://bj.xiaozhu.com/fangzi/29762014101.html')

相关内容

热门资讯

问题居然在实体卡槽上!美版iP... 6月2日消息,日前,又有博主提前把还没发布的iPhone 18 Pro电池参数给曝光了出来,根据爆料...
2026年618手机购机攻略:... 每年618都是手机换新的最佳窗口期,2026年大促叠加数码国补、平台满减、品牌降价三重福利,新机老机...
8.99万打穿底价!史无前例的... 科技的浪漫,不是将人类送入遥不可及的星辰,而是把曾经高不可攀的科幻,变成柴米油盐里的“论斤卖”。 如...
苹果首款折叠屏iPhone U... PChome 6月2日消息,据知名数码博主@i冰宇宙最新曝光的机模谍照,苹果首款折叠屏手机(内部命名...
特朗普:美伊暂停谈判?假新闻! 当地时间6月2日,美国总统特朗普否认伊朗已停止与调解方沟通,称美国与伊朗之间的对话一直在持续进行。特...
白宫记协晚宴改期至7月,特朗普... 当地时间6月2日,美国总统特朗普表示,此前因突发暴力事件而中断的白宫记协晚宴已确定改期至7月24日举...
特朗普,开掉“不听话的情报头子... 苦撑15个月后,美国国家情报总监加巴德即将离开白宫。她对辞职的解释是:丈夫近期确诊罕见骨癌,自己希望...
面粉染头模仿黄仁勋,农村青年走... “先赔偿5000元肖像侵权使用费,再删除所有视频,要么就起诉你。”这是黄仁勋的模仿者杨洋最近频繁收到...
驻马店非遗“大集”开张 把网络... 顶端新闻记者 王丹/文 李思翰 胡楚昊/图红纸剪出“抵制网络谣言”,糖画写下“不信谣不传谣”……5月...
A股三大指数集体收涨 贵金属涨...   A股三大指数集体收涨 贵金属涨幅居前  【A股三大指数集体收涨 贵金属涨幅居前】6月2日,A股三...