利用python进行数据爬取_用Python实现数据的爬取

1 from os importpath2 importrequests3 from bs4 importBeautifulSoup4 importjson5 importpymysql6 importnumpy as np7 importtime8

9 url = 'https://ncov.dxy.cn/ncovh5/view/pneumonia?from=timeline&isappinstalled=0' #请求地址

10 headers ={11 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'} #创建头部信息

12 response = requests.get(url, headers=headers) #发送网络请求

13 #print(response.content.decode('utf-8'))#以字节流形式打印网页源码

14 content = response.content.decode('utf-8')15 #print(content)

16 soup = BeautifulSoup(content, 'html.parser')17 listA = soup.find_all(name='script', attrs={"id": "getAreaStat"})18 #世界确诊

19 listB = soup.find_all(name='script', attrs={"id": "getListByCountryTypeService2"})20 #listA = soup.find_all(name='div',attrs={"class":"c-touchable-feedback c-touchable-feedback-no-default"})

21 account =str(listA)22 #world_messages = str(listB)[87:-21]

23 messages = account[52:-21]24 messages_json =json.loads(messages)25 #world_messages_json = json.loads(world_messages)

26 valuesList =[]27 cityList =[]28

29 con =len(messages_json)30 k =031 for i inrange(len(messages_json)):32 #value = messages_json[i]

33 k = k + 1

34 value =(35 k, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())), messages_json[i].get('provinceShortName'), None,36 messages_json[i].get('confirmedCount'), messages_json[i].get('suspectedCount'), messages_json[i].get('curedCount'),37 messages_json[i].get('deadCount'), messages_json[i].get('locationId'))38 valuesList.append(value)39 cityValue = messages_json[i].get('cities')40 #print(cityValue)

41 for j inrange(len(cityValue)):42 con = con + 1

43 cityValueList =(44 con, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())), messages_json[i].get('provinceShortName'),45 cityValue[j].get('cityName'), cityValue[j].get('confirmedCount'), cityValue[j].get('suspectedCount'),46 cityValue[j].get('curedCount'), cityValue[j].get('deadCount'), cityValue[j].get('locationId'))47 #print(cityValueList)

48 cityList.append(cityValueList)49 #cityList.append(cityValue)

50 db = pymysql.connect("localhost", "root", "******", "yiqing", charset='utf8')51 cursor =db.cursor()52 array =np.asarray(valuesList[0])53 #sql_clean_world = "TRUNCATE TABLE world_map"

54 #sql_clean_city = "TRUNCATE TABLE city_map"

55 #sql_clean_json = "TRUNCATE TABLE province_data_from_json"

56 sql_clean_province = "TRUNCATE TABLE info3"

57 #sql1 = "INSERT INTO city_map values (%s,%s,%s,%s,%s,%s,%s,%s)"

58 #sql_world = "INSERT INTO world_map values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"

59 #sql = "INSERT INTO province_map values (0,'%s','%s','%s','%s','%s','%s','%s','%s','%s','%s') "

60 sql = "INSERT INTO info2 values (%s,%s,%s,%s,%s,%s,%s,%s,%s)"

61 #sql = "INSERT INTO province_map (provinceName,provinceShortName,correntConfirmedCount,confirmedCount,

62 #suspectedCount,curedCount,deadCount,comment,locationId,statisticsData) values (0,'%s','%s','%s','%s','%s','%s',

63 #'%s','%s','%s','%s') " sql = """INSERT INTO province_map (provinceName,provinceShortName,correntConfirmedCount,

64 #confirmedCount,suspectedCount,curedCount,deadCount,comment,locationId,statisticsData) values ('湖北省', '湖北', 43334,

65 #64786, 0, 18889, 2563, '', 420000, 'https://file1.dxycdn.com/2020/0223/618/3398299751673487511-135.json')"""

66 value_tuple =tuple(valuesList)67 cityTuple =tuple(cityList)68 #worldTuple = tuple(worldList)

69 #print(cityTuple)

70 #print(tuple(value_tuple))

71 try:72 #cursor.execute(sql_clean_city)

73 cursor.execute(sql_clean_province)74 #cursor.executemany(sql, value_tuple)

75 #cursor.executemany(sql1,cityTuple)

76 db.commit()77 except:78 print('执行失败,进入回调1')79 db.rollback()80

81 try:82 #cursor.execute(sql_clean_city)

83 #cursor.execute(sql_clean_province)

84 cursor.executemany(sql, value_tuple)85 #cursor.executemany(sql1,cityTuple)

86 db.commit()87 except:88 print('执行失败,进入回调3')89 db.rollback()90

91 try:92 #cursor.execute(sql_clean_city)

93 #cursor.execute(sql_clean_province)

94 #cursor.executemany(sql, value_tuple)

95 cursor.executemany(sql, cityTuple)96 db.commit()97 except:98 print('执行失败,进入回调4')99 db.rollback()100

101 #print(messages_json)

102 #print(account[52:-21])

103 #soupDiv = BeautifulSoup(listA,'html.parser')

104 #listB = soupDiv.find_all(name='div',attrs={"class":"c-gap-bottom-zero c-line-clamp2"})

105 #for i in listA:

106 #print(i)

107 #listA[12]

108 #print(listA)

109

110

111 db.close()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值