一、读取 html 中内容(GET 方式传参)
import urllib
import urllib.request
import urllib.parse
import re
# 查询名称 查询条数
def queryUrl(number):
#定义百度搜索请求URL
url="http://www.test.com/timerbin/test?"+str(number)
#发起request请求,并获取返回结果
response = urllib.request.urlopen(url)
#将返回结果进行转换成UTF-8转码
html = response.read().decode('utf-8')
#定义截取字符串正则表达式
print(html)
return "成功"
#清除无用字符信息
def trims(str):
str = str.strip('target="_blank"')
str = str.replace('<em>','').replace('</em>','').replace('</a>','')
str = str.replace('>','').replace('"','').replace(' ','').replace(' ','')
return trim(str)
#清除换行和前后空格
def trim(str):
str = str.replace('\n','').strip()
return str
myUrl = queryUrl(415528)
print(myUrl)
2、多线程循环输出
import re
import os
import time
import threading
import threadpool
#多线程跑 1个线程10数据(测试)
def queryTestLoop(filName,start,end):
while start < end:
try:
start = start + 1;
print(str(filName));
time.sleep(1) #睡眠1S
except BaseException:
print('error')
t0 = threading.Thread(target=queryTestLoop, args=('timerbin',1,10))
t0.start()
3、读取请求URL,写入到文件中
import requests
import json
import os
#写入文件路径
path='E:\\log\\python.txt'
#判断该路径下是否有该文件,有则删除文件
if os.path.exists(path):
os.remove(path)
# 地址信息链接
url="http://www.test.com/timerbin/test?name=timerbin"
# 请求地址信息的URL,获取其text文本
addrData = requests.get(url).text
#加载为json格式数据
file = open(path,'a')
file.write(addrData)
file.close()
print("成功")
4、post 读取请求URL
import json
import os
import random
import shutil
import time
import socket
import requests
import requests.packages
def post_url(url,cityId):
#请求头
headers={
'Host':'www.test.com',
'Accept':'application/json, text/javascript, */*; q=0.01',
'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding':'gzip, deflate, br',
'Content-Type':'application/json',
'Referer':'https://www.test.com/',
'Content-length':'148',
'Origin':'https://www.test.com',
'Connection':'close',
'User-Agent':ie
}
#请求参数
format={'attributes':['cityId_'+cityId],'sortOrder':'desc'}
s=json.dumps(format)
#发送post请求
req=requests.post(url, data=s, headers=headers,verify=False)
da=json.loads(req.text)
#随机睡眠一段时间
time.sleep(random.random())
post_url('http://www.timerbin.com/url/test','1235')