print(page_text) tree = etree.HTML(page_text) li_list = tree.xpath('//ul[@class="house-list-wrap"]/li') for li in li_list: text = li.xpath('./div[2]/h2/a/text()') print(text) print('over')
爬虫爬取数据时中文乱码
1 2 3 4 5 6
解决方案一:响应的时候对数据进行编码 response = requests.get('http://pic.netbian.com/4kdongman/',headers=headers) # response.encoding = 'utf-8' 解决方案二:对乱码的数据进行解码在进行编码 title = li.xpath('./a/img/@alt')[0] title = title.encode('iso-8859-1').decode('gbk')
li_list = tree.xpath('//div[@class="slist"]/ul/li') print(li_list) url = 'http://pic.netbian.com' for li in li_list: img_src = li.xpath('./a/img/@src')[0] title = li.xpath('./a/img/@alt')[0] title = title.encode('iso-8859-1').decode('gbk')