get post请求
import requests
url = 'https://www.baidu.com/'
response = requests.get(url)
print(response.status_code)
p=requests.post(url)
print(p.status_code)
200
302
断网后返回
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
140 conn = connection.create_connection(
--> 141 (self.host, self.port), self.timeout, **extra_kw)
142
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
59
---> 60 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
61 af, socktype, proto, canonname, sa = res
C:\ProgramData\Anaconda3\lib\socket.py in getaddrinfo(host, port, family, type, proto, flags)
744 addrlist = []
--> 745 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
746 af, socktype, proto, canonname, sa = res
gaierror: [Errno 11004] getaddrinfo failed
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
345 try:
--> 346 self._validate_conn(conn)
347 except (SocketTimeout, BaseSSLError) as e:
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _validate_conn(self, conn)
849 if not getattr(conn, 'sock', None): # AppEngine might not have `.sock`
--> 850 conn.connect()
851
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connection.py in connect(self)
283 # Add certificate verification
--> 284 conn = self._new_conn()
285
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
149 raise NewConnectionError(
--> 150 self, "Failed to establish a new connection: %s" % e)
151
NewConnectionError: <urllib3.connection.VerifiedHTTPSConnection object at 0x00000000085620F0>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
439 retries=self.max_retries,
--> 440 timeout=timeout
441 )
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
638 retries = retries.increment(method, url, error=e, _pool=self,
--> 639 _stacktrace=sys.exc_info()[2])
640 retries.sleep()
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
387 if new_retry.is_exhausted():
--> 388 raise MaxRetryError(_pool, url, error or ResponseError(cause))
389
MaxRetryError: HTTPSConnectionPool(host='www.baidu.com', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x00000000085620F0>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-11-6afebbbf6d3c> in <module>()
1 import requests
2 url = 'https://www.baidu.com/'
----> 3 response = requests.get(url)
4 print(response.status_code)
5 p=requests.post(url)
C:\ProgramData\Anaconda3\lib\site-packages\requests\api.py in get(url, params, **kwargs)
70
71 kwargs.setdefault('allow_redirects', True)
---> 72 return request('get', url, params=params, **kwargs)
73
74
C:\ProgramData\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
56 # cases, and look like a memory leak in others.
57 with sessions.Session() as session:
---> 58 return session.request(method=method, url=url, **kwargs)
59
60
C:\ProgramData\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
506 }
507 send_kwargs.update(settings)
--> 508 resp = self.send(prep, **send_kwargs)
509
510 return resp
C:\ProgramData\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
616
617 # Send the request
--> 618 r = adapter.send(request, **kwargs)
619
620 # Total elapsed time of the request (approximately)
C:\ProgramData\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
506 raise SSLError(e, request=request)
507
--> 508 raise ConnectionError(e, request=request)
509
510 except ClosedPoolError as e:
ConnectionError: HTTPSConnectionPool(host='www.baidu.com', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x00000000085620F0>: Failed to establish a new connection: [Errno 11004] getaddrinfo failed',))
豆瓣电影 top250 https://movie.douban.com/top250
import requests
import re
import csv
# https://blog.csdn.net/bmjhappy/article/details/80512917 中文字符串匹配
def movie_info(url):
headers = {
'User-Agent':"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"
}
res = requests.get(url, headers=headers)
ranks = re.findall(' <em class="">(.*?)</em>',res.text, re.S)
# 使用[\u4e00 -\u9fa5]代表中文字符
names = re.findall('<span class="title">([\u4e00-\u9fa5]+)</span>',res.text, re.S)
countries = re.findall(' / ([\u4e00-\u9fa5]+) / ', res.text, re.S)
text = re.sub('导演: ',"",res.text) # :中文标点符号
directors = re.findall('<p class="">.*?: (.*?) .*? (\d+).*?</p>', text, re.S)
scores = re.findall('<span class="rating_num" property="v:average">(.*?)</span>',res.text,re.S)
for rank,name,country,director,score in zip(ranks,names,countries,directors,scores):
writer.writerow([rank,name,country,director,score])
if __name__ == '__main__':
file = open('./movie.csv','w+',encoding='utf-8',newline='')
writer = csv.writer(file)
writer.writerow(['rank','name','country','director','score'])
for i in range(0,250,25):
#利用url规则循环爬去
url = 'https://movie.douban.com/top250?start={}&filter='.format(i)
if i==200 :
print(i)
movie_info(url)0
参考:https://desmonday.github.io/2019/03/02/python爬虫学习-day2正则表达式/