import datetime
import pandas as pd
import xlsxwriter as xlw
import requests
from bs4 import BeautifulSoup as bs
import re
import time
def dateRange(start, end):
strptime, strftime = datetime.datetime.strptime, datetime.datetime.strftime
days = (strptime(end, "%Y-%m") - strptime(start, "%Y-%m")).days
datelist1 = [strftime(strptime(start, "%Y-%m") + datetime.timedelta(i), "%Y%m") for i in range(0, days, 1)]
datelist = sorted(list(set(datelist1)))
return datelist
def dateRange1(start, end):
datelist1 = [datetime.datetime.strftime(x, '%Y%m') for x in list(pd.date_range(start=start, end=end))]
datelist = sorted(list(set(datelist1)))
return datelist
def list_to_excel(weather_result, filename):
workbook = xlw.Workbook('E:\%s.xlsx' % filename)
sheet = workbook.add_worksheet('weather_report')
title = ['日期', '最高气温', '最低气温', '天气', '风向', '风力']
for i in range(len(title)):
sheet.write_string(0, i, title[i], workbook.add_format({'bold': True}))
row, col = 1, 0
print(weather_result[len(weather_result) - 1])
print(weather_result[0].splitlines()[1])
for a in range(0, len(weather_result)):
for b in range(1, 6):
sheet.write_string(row, col, weather_result[a].splitlines()[b])
col += 1
row += 1
col = 0
workbook.close()
def getCommentsById(city, start, end):
weather_result = []
datelist = dateRange(start, end)
for i in datelist:
url = 'http://lishi.tianqi.com/' + city + '/' + i + '.html'
print(i)
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
response = requests.get(url,headers=headers)
soup = bs(response.text, 'html.parser')
weather_m = soup.select('.thrui')
uls = weather_m[0].find_all('li')
for ul in uls:
al = " ".join('%s' %id for id in ul)
dr = re.compile(r'<[^>]+>', re.S)
dd = dr.sub('', al)
weather_result.append(dd)
print(weather_result[0])
return weather_result
if __name__ == '__main__':
data = getCommentsById('chongqing', '2010-01', '2021-01')
list_to_excel(data, '重庆python天气')
```bash
import requests
from bs4 import BeautifulSoup
url='http://www.cntour.cn/'
strhtml=requests.get(url)
soup=BeautifulSoup(strhtml.text,'lxml')
data = soup.select('#main>div>div.mtop.firstMod.clearfix>div.centerBox>ul.newsList>li>a')
print(data)
print(soup.p.string)
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
response = request.get(url,headers=headers)
for item in data:
result={
'title':item.get_text(),
'link':item.get('href')
}
print(result)
import re
for item in data:
result={
"title":item.get_text(),
"link":item.get('href'),
'ID':re.findall('\d+',item.get('href'))
}
print(result)
import requests
import json
def get_translate_date(word=None):
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
From_data={'i':word,
'from':'AUTO',
'to':'AUTO',
'smartresult':'dict',
'client':'fanyideskweb',
'salt':'16147623573022',
'sign':'48d7cce186aa05fbe627ec746da39358',
'ts':'1614762357302',
'bv':'19413bb132e864b42a71e17c0a92015a',
'doctype':'json',
'version':'2.1',
'keyfrom':'fanyi.web',
'action':'FY_BY_REALTIME',
'typoResult':'false'}
response = requests.post(url,data=From_data)
content = json.loads(response.text)
print(content)
print(content['translateResult'][0][0]['tgt'])
if __name__=='__main__':
get_translate_date('我爱中国')
import requests
url = 'https://lishi.tianqi.com/chongqing/201901.html'
strhtml = requests.get(url)
print(strhtml.text)
file = open('E:\c.txt', 'w', encoding='utf-8')
file.write(strhtml.text)