'''
@author:zl
@contact:
@site: https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,2.html
'''
# _*_ coding:utf-8 _*_
import requests
from bs4 import BeautifulSoup
import re
import time
from pymongo import MongoClient
import xlwt
import json
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36" ,
'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
'accept-encoding': "gzip, deflate, br",
'accept-language': "zh-CN,zh;q=0.9",
'cache-control': "max-age=0",
'upgrade-insecure-requests': "1",
'Connection': 'keep-alive',
'Host': "search.51job.com",
}
# 获取源码
def get_content():
post_param = {'action':'','start': 0,'limit':300}
html = requests.get("https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90", params=post_param, verify=False)
#jsondata = html.content.decode(encoding='utf-8')
jsondata=html.json()
return jsondata
# 获取字段
def get(jsondata):
#jsondata=json.loads(jsondata)
list = []
for i in jsondata:
item ={
'rank':i['rank'],
'cover_url':i['cover_url'],
'id':i['id'],
'types':i['types'],
'regions':i['regions'],
'title':i['title'],
'url':i['url'],
'release_date':i['release_date'],
'actor_count':i['actor_count'],
'vote_count':i['vote_count'],
'score':i['score'],
'actors':i['actors'],
}
list.append(item)
return list
# 爬到的内容写入excel
def excel_write(items):
for item in items: # 职位信息
j=0
for i in item:
print(item[i])
print("j:",j)
index=item['rank']
print("index:",index)
ws.write(index, j, item[i]) # 行,列,数据
j += 1
if __name__ == '__main__':
newTable = "test2.xls" # 表格名称
wb = xlwt.Workbook(encoding='utf-8') # 创建excel文件,声明编码
ws = wb.add_sheet('sheet1',cell_overwrite_ok=True) # 创建表格
headData = ['rank', 'cover_url', 'id','types','regions','title','url','release_date','actor_count','vote_count','score','actors'] # 表头信息
for colnum in range(0,12):
ws.write(0,colnum,headData[colnum],xlwt.easyxf('font: bold on'))
excel_write(get(get_content()))
wb.save(newTable)
#解析json
import requests
response=requests.get('http://httpbin.org/get')
import json
res1=json.loads(response.text)
#太麻烦
res2=response.json()
#直接获取json数据
print(res1 == res2) #True
'''
@author:zl
@contact:
@site: https://search.51job.com/list/000000,000000,0000,00,9,99,python,2,2.html
'''
# _*_ coding:utf-8 _*_
import requests
from bs4 import BeautifulSoup
import re
import time
from pymongo import MongoClient
import xlwt
import json
headers = {
'user-agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36" ,
'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
'accept-encoding': "gzip, deflate, br",
'accept-language': "zh-CN,zh;q=0.9",
'cache-control': "max-age=0",
'upgrade-insecure-requests': "1",
'Connection': 'keep-alive',
'Host': "search.51job.com",
}
# 获取源码
def get_content():
post_param = {'action':'','start': 0,'limit':300}
html = requests.get("https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90", params=post_param, verify=False)
#jsondata = html.content.decode(encoding='utf-8')
jsondata=html.json()
return jsondata
# 获取字段
def get(jsondata):
#jsondata=json.loads(jsondata)
list = []
for i in jsondata:
item ={
'rank':i['rank'],
'cover_url':i['cover_url'],
'id':i['id'],
'types':i['types'],
'regions':i['regions'],
'title':i['title'],
'url':i['url'],
'release_date':i['release_date'],
'actor_count':i['actor_count'],
'vote_count':i['vote_count'],
'score':i['score'],
'actors':i['actors'],
}
list.append(item)
return list
# 爬到的内容写入excel
def excel_write(items):
for item in items: # 职位信息
j=0
for i in item:
print(item[i])
print("j:",j)
index=item['rank']
print("index:",index)
ws.write(index, j, item[i]) # 行,列,数据
j += 1
if __name__ == '__main__':
newTable = "test2.xls" # 表格名称
wb = xlwt.Workbook(encoding='utf-8') # 创建excel文件,声明编码
ws = wb.add_sheet('sheet1',cell_overwrite_ok=True) # 创建表格
headData = ['rank', 'cover_url', 'id','types','regions','title','url','release_date','actor_count','vote_count','score','actors'] # 表头信息
for colnum in range(0,12):
ws.write(0,colnum,headData[colnum],xlwt.easyxf('font: bold on'))
excel_write(get(get_content()))
wb.save(newTable)