#coding=utf-8
from lxml import etree
import requests
import csv
import time
import sys
import codecs
reload(sys)
sys.setdefaultencoding('utf8')
def WriteCSV(Item):
print Item[0],Item[1],Item[2]
with open('home.csv','a+') as f:
f.write(codecs.BOM_UTF8)
writer = csv.writer(f)
writer.writerow(Item)
#headers = {
#'User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
#}
start_Url = 'http://beijing.qfang.com/garden/n'
for x in range(1,5):
url = start_Url + str(x)
html = requests.get(url)
time.sleep(1)
selector = etree.HTML(html.text)
Lists = selector.xpath('//*[@id="cycleListings"]/ul/li')
for List in Lists:
Name = List.xpath('div[1]/p[1]/a/text()')[0]
# //*[@id="cycleListings"]/ul/li[1]/div[1]/p[3]/span[1]
Address = List.xpath('div[1]/p[3]/span[1]/text()')[0]
Price = List.xpath('div[2]/p[1]/span[1]/text()')[0]
Item = [Name,Address,Price]
WriteCSV(Item)
本文为python,所写的代码,代码中,遇到的问题是UnicodeEncodeError: 'ascii' codec can't encode/decode characters,然后改变行文编码为utf-8:import sys
reload(sys)
sys.setdefaultencoding('utf8')
#coding=utf-8
from lxml import etree
import requests
import csv
import time
import sys
import codecs
reload(sys)
sys.setdefaultencoding('utf8')
def WriteCSV(Item):
print Item[0],Item[1],Item[2]
with open('home.csv','a+') as f:
f.write(codecs.BOM_UTF8)
writer = csv.writer(f)
writer.writerow(Item)
#headers = {
#'User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
#}
start_Url = 'http://beijing.qfang.com/garden/n'
for x in range(1,5):
url = start_Url + str(x)
html = requests.get(url)
time.sleep(1)
selector = etree.HTML(html.text)
Lists = selector.xpath('//*[@id="cycleListings"]/ul/li')
for List in Lists:
Name = List.xpath('div[1]/p[1]/a/text()')[0]
# //*[@id="cycleListings"]/ul/li[1]/div[1]/p[3]/span[1]
Address = List.xpath('div[1]/p[3]/span[1]/text()')[0]
Price = List.xpath('div[2]/p[1]/span[1]/text()')[0]
Item = [Name,Address,Price]
WriteCSV(Item)
import sys
reload(sys)
sys.setdefaultencoding('utf8')
在写入csv中遇到了乱码错误:
with open('home.csv','a+') as f:
f.write(codecs.BOM_UTF8)
writer = csv.writer(f)
writer.writerow(Item)