python 爬取国家粮食局东北地区玉米收购价格监测信息

最新推荐文章于 2022-12-30 12:28:29 发布

degoumo1921

最新推荐文章于 2022-12-30 12:28:29 发布

阅读量249

点赞数

文章标签： python runtime

原文链接：http://www.cnblogs.com/wangjunhaoBlog/p/8080099.html

版权

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import re
import sys
import time
import urllib
import urllib.request
from datetime import datetime

def get_html(url):
   try:
       request = urllib.request.Request(url)
       WebPageContent = urllib.request.urlopen(request)
       return WebPageContent.read().decode('UTF-8')
   except urllib.error.HTTPError as e:
       print(e)
       return "Error"

def cbk(a, b, c):
   #显示当前下载进度
   persent = 100.00 * a * b / c
   if persent > 100:
       persent = 100
   sys.stdout.write("{0} %\r".format(round(persent,2)))
   sys.stdout.flush()

def get_image(WebPageContent, re_rule, output_path):
   image = re.compile(re_rule)
   image_url_list = re.findall(image, WebPageContent)
   for part_url in image_url_list:
       image_url = "%s%s.jpg" % ("http://www.chinagrain.gov.cn/", part_url)
   print(image_url)
   try:
       urllib.request.urlretrieve(url=image_url, filename=output_path, reporthook=cbk)
   except IsADirectoryError as e:
       print(e)



if __name__ == '__main__':
   #__init__
   url = "http://www.chinagrain.gov.cn/n787423/c1163380/content.html"
   re_rule = r'src="../../(.+?).jpg"'
   output_path = ""

   starttime = datetime.now()
   WebPageContent = get_html(url=url)
   #print(WebPageContent)
   get_image(WebPageContent=WebPageContent, re_rule=re_rule, output_path=output_path)
   endtime = datetime.now()
   print("Runtime is: %s s" % (endtime-starttime).seconds)

转载于:https://www.cnblogs.com/wangjunhaoBlog/p/8080099.html

degoumo1921

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
python 爬取国家粮食局东北地区玉米收购价格监测信息

#!/usr/bin/python# -*- coding: UTF-8 -*-import reimport sysimport timeimport urllibimport urllib.requestfrom datetime import datetimedef get_html(url): try: request = urllib.request.Req...
复制链接

扫一扫