比如说保存数据到excel,只要执行一个命令pip install openpyxl 再加几行代码就搞定了
以下代码是爬取糯米的部分数据,用基本的正则匹配当作练手
#-*- coding:utf-8 -*-
import re
import requests
import json
import random
from time import sleep
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
class JSONObject:
def __init__(self, d):
self.__dict__ = d
print("---------------------start----------------------")
#
# with open('meishiall.txt', 'wt') as f:
for j in range(1,49):
try:
if j == 1:
url = "https://nc.nuomi.com/326"
else:
url = "https://nc.nuomi.com/326-page"+str(j)+"?#j-sort-bar"
print("-------------------url=" + url + "--------------------")
# print("-------------------url="+url+"--------------------",file=f)
ws.append([url])
r = requests.get(url)
html= r.content.decode("utf8", "ignore")
# print(html)
pic_url = re.findall('
<="" code="">