python抓取网站数据并图形化显示(二)

今天主要进行代码解析,分析各个代码
整体源代码在此,懒得看解释的[直接下载]

一、nCov应用代码

settings.py
主要是设置数据库连接和static文件夹,添加epidemic应用,注意,要把static设置为resource root文件夹。

"""
Django settings for nCov project.

Generated by 'django-admin startproject' using Django 3.0.

For more information on this file, see
https://docs.djangoproject.com/en/3.0/topics/settings/

For the full list of settings and their values, see
https://docs.djangoproject.com/en/3.0/ref/settings/
"""

import os

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'rbigep3-z^npju$pl+e7v#mt9q)6n=!qaelt&wh75r99qop4i^'

# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True

ALLOWED_HOSTS = ['*']


# Application definition

INSTALLED_APPS = [
    'django.contrib.admin',
    'django.contrib.auth',
    'django.contrib.contenttypes',
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'epidemic'
]

MIDDLEWARE = [
    'django.middleware.security.SecurityMiddleware',
    'django.contrib.sessions.middleware.SessionMiddleware',
    'django.middleware.common.CommonMiddleware',
    'django.middleware.csrf.CsrfViewMiddleware',
    'django.contrib.auth.middleware.AuthenticationMiddleware',
    'django.contrib.messages.middleware.MessageMiddleware',
    'django.middleware.clickjacking.XFrameOptionsMiddleware',
]

ROOT_URLCONF = 'nCov.urls'

TEMPLATES = [
    {
        'BACKEND': 'django.template.backends.django.DjangoTemplates',
        'DIRS': [os.path.join(BASE_DIR, 'templates')]
        ,
        'APP_DIRS': True,
        'OPTIONS': {
            'context_processors': [
                'django.template.context_processors.debug',
                'django.template.context_processors.request',
                'django.contrib.auth.context_processors.auth',
                'django.contrib.messages.context_processors.messages',
            ],
        },
    },
]

WSGI_APPLICATION = 'nCov.wsgi.application'


# Database
# https://docs.djangoproject.com/en/3.0/ref/settings/#databases

DATABASES = {
     'default': {
        'ENGINE': 'django.db.backends.mysql',  # 或者使用 mysql.connector.django
        'NAME': 'ncov',
        'USER': 'root',
        'PASSWORD': '1234',
        'HOST': 'localhost',
        'PORT': '3306',
    }
}


# Password validation
# https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators

AUTH_PASSWORD_VALIDATORS = [
    {
        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
    },
    {
        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
    },
]


# Internationalization
# https://docs.djangoproject.com/en/3.0/topics/i18n/

LANGUAGE_CODE = 'en-us'

TIME_ZONE = 'UTC'

USE_I18N = True

USE_L10N = True

USE_TZ = True


# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/3.0/howto/static-files/

STATIC_URL = '/static/'
STATIC_ROOT = os.path.join(BASE_DIR, '/static/')
STATICFILES_DIRS = (os.path.join(BASE_DIR, 'static'),)

urls.py
这里主要设置引用epidemic下的urls.py文件

"""nCov URL Configuration

The `urlpatterns` list routes URLs to views. For more information please see:
    https://docs.djangoproject.com/en/3.0/topics/http/urls/
Examples:
Function views
    1. Add an import:  from my_app import views
    2. Add a URL to urlpatterns:  path('', views.home, name='home')
Class-based views
    1. Add an import:  from other_app.views import Home
    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
Including another URLconf
    1. Import the include() function: from django.urls import include, path
    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
"""
from django.conf.urls import url
from django.conf.urls.static import static
from django.contrib import admin
from django.urls import include,path
from django.views.static import serve

from nCov import settings
#from nCov.settings import MEDIA_ROOT
from . import views


urlpatterns = [
    path('admin/', admin.site.urls),
    path('tjg/', views.index),
    path('', views.index00),
    #url(r'^media/(?P<path>.*)$', serve, {'document_root': settings.MEDIA_ROOT}),
    path('epidemic/', include('epidemic.urls')),
]

views.py
这里主要设置一个首页链接

from django.shortcuts import render

def index(requset):
    content = {}
    content ['hello'] = 'how are you!'
    return render(requset,'index.html',content)

def index00(requset):
    content = {}
    content ['hello'] = 'how are you!'
    return render(requset,'index00.html',content)

二、template代码

这里主要解析相关的源代码,多余部分就不分析了,感兴趣的可以查看源代码。
index00.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width,initial-scale=1.0">
    <title>冠状病毒疫情</title>
</head>
<body>
<table align="center"><tr><td>这里是<h1>老朽</h1>的疫情图<br>

在家没事干,研究下从卫计委抓取数据,而后以图表形式显示出来<br>
   点击 <a href="epidemic/show00"><h3>趋势显示</h3></a>显示疫情<br>
昆明的形势不容乐观啊,只能憋家里了!!!
    点击 <a href="epidemic/yunnan"><h3>地图显示</h3></a>显示疫情<br>

 <!-- 以下是管理使用的 -->
    <!-- 以下是管理使用的 -->
    点击 <a href="epidemic/insertvirus"><h3>插入数据</h3></a>显示疫情<br>
      点击 <a href="epidemic/deletetable"><h3>删除数据</h3></a>显示疫情<br>
    点击 <a href="epidemic/insertviruslink"><h3>添加链接</h3></a>显示疫情<br>
     点击 <a href="epidemic/checkdatabase"><h3>检查数据</h3></a>显示疫情<br>
    <!-- 以下是管理使用的-->
</td></tr></table>

</body>
</html>

show00.html

<!DOCTYPE HTML>
<html>
	<head>
		<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
		<meta name="viewport" content="width=device-width, initial-scale=1">
		<title>疫情显示</title>
		<style type="text/css">
		</style>
	</head>
	<body>

  <script src="../../static/js/highcharts/highcharts.js"></script>
<script src="../../static/js/highcharts/modules/exporting.js"></script>
<script src="../../static/js/highcharts/modules/export-data.js"></script>

<div id="container" style="min-width: 310px; height: 400px; margin: 0 auto"></div>

		<script type="text/javascript">
Highcharts.chart('container', {
    chart: {
        type: 'line'
    },
    title: {
        text: '云南省{{ citylistname }}疫情趋势图'
    },
    subtitle: {
        text: '返回首页:  <a href="/">老朽的首页</a>'
    },
    xAxis: {
        categories: {{ date_record|safe }}
    },
    yAxis: {
        title: {
            text: '人数'
        }
    },
    plotOptions: {
        line: {
            dataLabels: {
                enabled: true
            },
            enableMouseTracking: true
        }
    },
    series: [{
        name: '新增',
        data: {{ numd }}
    },  {
        name: '确诊',
        data:  {{ numo }}
    },
        {
        name: '死亡',
        data:  {{ numdead }}
    }]
});
		</script>
    <ul align="center"> <form action ="../show00" method="get">
<select name="citylistname">
 <option value='全省'>全省</option>
 {% for c in city %}
    <option value={{c}}>{{c}}</option>
  {% endfor %}
</select>
        <input type="submit" value="选择要显示数据的城市">
</form></ul>
<a href="/">全省地图</a>

	</body>
</html>

success.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>添加成功</title>
</head>
<body>
<ul align="center">{{data}}!!</ul>
<ul align="center"><a href="/">返回</a></ul>
</body>
</html>

yunnan.html

<!DOCTYPE html>
<html lang="en">
<head>
	<meta charset="UTF-8">
	<title> 云南省</title>
</head>
<body>
<div id="map" style="width:800px;height: 500px;"></div>
<script src="../../static/js/highmaps/highmaps.js"></script>
<script src="../../static/js/highmaps/yunnan.js"></script>
<script>
// 随机数据
var data1 = [{"name":"昆明","value":13},{"name":"曲靖","value":17},{"name":"玉溪","value":82},{"name":"保山","value":63},{"name":"昭通","value":90},{"name":"丽江","value":57},{"name":"普洱","value":32},{"name":"临沧","value":53},{"name":"楚雄彝族","value":47},{"name":"红河哈尼族彝族","value":95},{"name":"文山苗族","value":25},{"name":"西双版纳傣族","value":34},{"name":"大理白族","value":99},{"name":"德宏傣族景颇族","value":28},{"name":"怒江傈僳族","value":62},{"name":"迪庆","value":34}];

//获取传递来数据,并写入data1中
for (var i=0;i<data1.length;i++)
{
   {% for item in data %}
       if (i={{ forloop.counter }})
       {
          data1[i-1].value = {{ item}};//注意这里i的取值范围是【1-16】所以要用i-1
       }
    {% endfor %}


}

// 初始化图表
var map = new Highcharts.Map('map', {
  title: {
    text: '<a href="/">云南省</a>'
  },
  colorAxis: {
    min: 0,
    minColor: 'rgb(255,255,255)',
    maxColor: '#006cee'
  },
  series: [{
    data: data1,
    name: '病例数',
    mapData: Highcharts.maps['cn/yunnan'],
    joinBy: 'name' // 根据 name 属性进行关联
  }]
});
</script>
</body></html>

三、epidemic应用代码

const.py
用来命名常量

# -*- coding: utf-8 -*-
# python 3.x
# Filename:const.py
# 定义一个常量类实现常量的功能
#
# 该类定义了一个方法__setattr()__,和一个异常ConstError, ConstError类继承
# 自类TypeError. 通过调用类自带的字典__dict__, 判断定义的常量是否包含在字典
# 中。如果字典中包含此变量,将抛出异常,否则,给新创建的常量赋值。
# 最后两行代码的作用是把const类注册到sys.modules这个全局字典中。
class _const:
    class ConstError(TypeError): pass

    def __setattr__(self, name, value):
        if name in self.__dict__:
            raise self.ConstError("Can't rebind const (%s)" % name)
        self.__dict__[name] = value

import sys
sys.modules[__name__] = _const()

constkey.py
用来存放selenium(这个怎么用,请百度吧,在此不赘述)驱动路径,云南州市名称等。
【注意】最新的代码我换了win10系统,python也由原来的3.8变成了3.9之后,原来所有的引用相同文件夹下其他python文件的情况都有所变化,比如import const ,需要变成from . import const,即都需要在前面加上“from . ”才行,不然就报错。

import const  

const.STATUS_TYPE=['确诊病例','危重','重症','死亡','出院','疑似病例','医学观察']
const.CITYNAME=['昆明市','丽江市','西双版纳州','保山市','玉溪市','曲靖市','普洱市','德宏州','临沧市','昭通市','大理州','楚雄州','红河州','文山州','怒江州','迪庆州']
const.CITYMAP=['昆明市','曲靖市','玉溪市','保山市','昭通市','丽江市','普洱市','临沧市','楚雄州','红河州','文山州','西双版纳州','大理州', '德宏州', '怒江州','迪庆州']

const.CITYCODE=['kunming','lijiang','xishuangbanna','baoshan','yuxi','qujing','puer','dehong','lincang','zhaotong','dali','quxiong','honghe','wenshan','nujiang','diqing']
const.URL = 'http://ynswsjkw.yn.gov.cn/wjwWebsite/web/col?id=UU157976428326282067&cn=xxgzbd&pcn=ztlm&pid=UU145102906505319731'#云南省卫计委
const.DRIVERPATH = 'D:\Python38\Lib\site-packages\selenium\webdriver\chrome\chromedriver.exe'

urls.py

from django.contrib import admin
from django.urls import path,re_path
from django.conf import settings
from django.views.static import serve
from . import views

urlpatterns = [
   path('show/', views.situation_show),
   path('show00/', views.show00),
   path('deletedate/', views.deletedate),
   path('insert/', views.insertdata),
   path('insert00/', views.insertdata00),
   path('getdata/', views.getdata),
   path('getdata00/', views.getdata00),
   path('delet/', views.deleterecord),
   path('showcity/', views.showcity),
   path('insertvirus/', views.insertvirus),
   path('deletealldata/', views.deletealldata),
   path('insertviruslink/', views.insertviruslink),
   path('deletetable/', views.deletetable),
   path('checkdatabase/', views.checkdatabase),
   path('yunnan/', views.yunnan),

]

views.py
这里代码冗余较多,有些是先前试验的,老旧函数可忽略不看,请大家分析看是从index00.html理过来,就明白了。注意,最后地图显示,在写本教程时,正在试验,所以,还有点问题。

import os
import datetime
import constkey
from . import insertvirusdata
import insertdata
from django.http import HttpResponse
from django.shortcuts import render
from epidemic.models import Situation,Total,Linktable
from django.shortcuts import render, redirect
from django.conf import settings
from mailmerge import MailMerge

def yunnan(request):
    total = Total.objects.order_by("id").all()
    request.encoding = 'utf-8'
    ci = ''
    listnumd = []  # 新增病例
    listnumo = []
    listnumt = []
    listnumdead = []
    listnumre = []
    listdate = []  # 记录总共有几个日期
    datalast = []  # 记录不同日期的全部数据 按照日期,每个日期一个元组,每个元组是各州市的数据
    cityalldata = []
    listdata = []  # 记录同一日期各个城市的数据
    daydate = []
    citydata = []
    i = 0
    daytotal = 0
    for t in total:
        if i % len(constkey.const.CITYNAME) == 0:  # 判断所有州中的第一个
            daydate.append(t.date_record)
        listdata.append(t.number_ok)
        daytotal = daytotal + t.number_ok
        if i % len(constkey.const.CITYNAME) == len(constkey.const.CITYNAME) - 1:  # 判断所有州中的第最后一个
            t1 = (
            listdata[0], listdata[1], listdata[2], listdata[3], listdata[4], listdata[5], listdata[6], listdata[7])
            t2 = (listdata[8], listdata[9], listdata[10], listdata[11], listdata[12], listdata[13], listdata[14],
                  listdata[15])
            t = t1 + t2
            datalast.append(t)
            cityalldata.append(daytotal)
            # listnumd.append(0)
            listnumt.append(0)
            listnumdead.append(0)
            listnumre.append(0)
            daytotal = 0
            listdata.clear()
        i += 1
    listnumo = cityalldata

    for d in daydate:
        listdate.append(str(d.month) + '.' + str(d.day))

    if 'citylistname' in request.GET and request.GET['citylistname']:
        ci = request.GET['citylistname']
        if ci == '全省':
            pass
        else:
            position = len(constkey.const.CITYNAME) - 1 - constkey.const.CITYNAME.index(ci)  # 获取城市在列表中的位置
            for data in datalast:
                citydata.append(data[position])
            listnumo = citydata
        print('显示' + ci + '数据')
    firstday = True
    for lo in listnumo:
        if firstday:
            yestoday = 0
            today = lo
            listnumd.append(today - yestoday)
            yestoday = today
            firstday = False
        else:
            today = lo
            listnumd.append(today - yestoday)
            yestoday = today

    #将获取的数据按照const.CITYMAP的顺序输出 因为二者的数据顺序不一样
    data = []
    for cm in constkey.const.CITYMAP:
        cityindex = len(constkey.const.CITYNAME) - 1 - constkey.const.CITYNAME.index(cm)
        data.append(datalast[-1][cityindex])

    return render(request,'yunnan.html',{'data':data})

def checkdatabase(request):
    message = '检查数据'
    #Linktable.objects.order_by("id")
    r  = Linktable.objects.order_by("id").last()
    result =str(r.linkvule)
    return render(request, 'success.html',{'data':message+'成功'+result})

def insertvirus(request):#获取插入从网上抓取的数据
    return insertvirusdata.insert(request)

def insertviruslink(request):#获取网页链接
    return insertvirusdata.insertlink(request)

def deletetable(request):
    return render(request,'deletetabledata.html')

def deletealldata(request):#删除所有数据
    request.encoding = 'utf-8'
    if 'table' in request.GET and request.GET['table']:
        table = request.GET['table']

    if table == 'total':
        table ='病例'
        Total.objects.all().delete()
    elif table == 'link':
        table = '链接'
        Linktable.objects.all().delete()
    else:
        table = '没有'

    return render(request, 'success.html',{'data':table+'数据删除成功'})


def show00(request):   #显示数据
    total = Total.objects.order_by("id").all()
    request.encoding = 'utf-8'
    ci =''
    listnumd=[]   #新增病例
    listnumo=[]
    listnumt=[]
    listnumdead=[]
    listnumre=[]
    listdate=[]#记录总共有几个日期
    datalast=[] #记录不同日期的全部数据 按照日期,每个日期一个元组,每个元组是各州市的数据
    cityalldata=[]
    listdata  = [] #记录同一日期各个城市的数据
    daydate = []
    citydata =[]
    i=0
    daytotal = 0
    for t in total:
        if i%len(constkey.const.CITYNAME) == 0 : #判断所有州中的第一个
            daydate.append(t.date_record)
        listdata.append(t.number_ok)
        daytotal = daytotal + t.number_ok
        if i%len(constkey.const.CITYNAME) == len(constkey.const.CITYNAME)-1:#判断所有州中的第最后一个
            t1 = (listdata[0],listdata[1],listdata[2],listdata[3],listdata[4],listdata[5],listdata[6],listdata[7])
            t2 = (listdata[8], listdata[9], listdata[10], listdata[11], listdata[12], listdata[13], listdata[14],listdata[15])
            t = t1 + t2
            datalast.append(t)
            cityalldata.append(daytotal)
            #listnumd.append(0)
            listnumt.append(0)
            listnumdead.append(0)
            listnumre.append(0)
            daytotal = 0
            listdata.clear()
        i+=1
    listnumo = cityalldata

    for d in daydate:
        listdate.append(str(d.month)+'.'+str(d.day))

    if 'citylistname' in request.GET and request.GET['citylistname']:
        ci = request.GET['citylistname']
        if ci=='全省':
            pass
        else:
            position = len(constkey.const.CITYNAME)-1-constkey.const.CITYNAME.index(ci)  # 获取城市在列表中的位置
            for data in datalast:
                citydata.append(data[position])
            listnumo = citydata
        print('显示'+ci+'数据')
    firstday = True
    for lo in listnumo:
        if firstday:
            yestoday = 0
            today = lo
            listnumd.append(today - yestoday)
            yestoday = today
            firstday = False
        else:
            today = lo
            listnumd.append(today-yestoday)
            yestoday = today
    #print(listnumo)
    #print(listdate)
    return render(request,'show00.html',{'citylistname':ci,'city':constkey.const.CITYNAME,'number':t,"numd":listnumd,'numo':listnumo,'numt':listnumt,'numdead':listnumdead,'numre':listnumre,'date_record':listdate})




####以下是老旧的函数



def getcitylist(code,name):
    citylist=[]
    i=0
    while i<len(code):
        list =(code[i],name[i])
        citylist.append(list)
        i+=1
    return citylist

def getdata(request):
    return render(request,'getdata.html')

def getdata00(request):
    return render(request,'getdata00.html')

def deletedate(request):
    return render(request,'deletedate.html')

def insertdata(request):
    content = "这里是插入数据"
    request.encoding = 'utf-8'
    if 'name' in request.GET and request.GET['name']:
        pname = request.GET['name']
        numberd = request.GET['numd']
        numbero = request.GET['numo']
        numbert = request.GET['numt']
        pday = request.GET['bday']
        ptime = request.GET['btime']

        etime = pday+' '+ptime

    record = Situation(province=pname,number_doubt=numberd,number_ok=numbero,number_total=numbert,time=etime)
    #record = Person(name=personname, sex=personsex, birthday=parse_ymd(pbirthday), city=response2.name)
    #test1 = Person(name='Simon6', sex="男", birthday=parse_ymd('1979-08-15'), city="昆明")
    record.save()
    return HttpResponse(content+" " + pname+" " + numberd + " " + numbero+ " " +etime+ "成功")

def insertdata00(request):
    content = "这里是插入汇总数据"
    request.encoding = 'utf-8'
    if 'numd' in request.GET and request.GET['numd']:
        numberd = request.GET['numd']
        numbero = request.GET['numo']
        numberdead = request.GET['numdead']
        numberre = request.GET['numre']
        pday = request.GET['bday']

    record = Total(number_doubt=numberd,number_ok=numbero,number_total=int(numberd)+int(numbero),number_dead=numberdead,number_recure=numberre,date_record=pday)
    #record = Person(name=personname, sex=personsex, birthday=parse_ymd(pbirthday), city=response2.name)
    #test1 = Person(name='Simon6', sex="男", birthday=parse_ymd('1979-08-15'), city="昆明")
    record.save()
    return HttpResponse(content+" 疑似: " + numberd+" 确诊: " + numbero+" 死亡: "+numberdead+" 治愈: " +numberre+"日期: " +pday+ "  成功")


def deleterecord(request):
    # 删除id=1的数据
    content = "这里是插入汇总数据"
    request.encoding = 'utf-8'
    if 'bday' in request.GET and request.GET['bday']:
        pday = request.GET['bday']
   # test1 = Total.objects.get(date_record=pday)
  #  test1.delete()

    # 另外一种方式
    Total.objects.filter(date_record=pday).delete()

    # 删除所有数据
    # Test.objects.all().delete()

    return HttpResponse("<p>删除成功</p>")

def situation_show(request):    #用来显示数据图示

    situation=Situation.objects.all()  #获取全部数据
    listx = []
    listage = []
    listheight = []
    listweight = []
    for s in situation:  #遍历,拼横纵坐标
        listx.append(str(s.province))
        listage.append(int(s.number_doubt))
        listheight.append(int(s.number_ok))
        listweight.append(int(s.number_total))
    return render(request, "show.html", {'users':s, 'X':listx, 'age':listage,'height':listheight,'weight':listweight})  #跳转到show.html,并将拼好的数据({'users':users, 'X':listx, 'Y':listy})传递到该页面

def showcity(request):
    if 'city' in request.GET and request.GET['city']:
        cityname = request.GET['city']
    total = Total.objects.all()
    listnumd = []
    listnumo = []
    listnumt = []
    listnumdead = []
    listnumre = []
    listdate = []
    for t in total:
        listnumd.append(t.number_doubt)
        listnumo.append(t.number_ok)
        listnumt.append(t.number_total)
        listnumdead.append(t.number_dead)
        listnumre.append(t.number_recure)
        listdate.append(str(t.date_record.month) + '.' + str(t.date_record.day))
    return render(request, 'show00.html', {'cityname':cityname,'city': constkey.const.CITYNAME, 'number': t, "numd": listnumd, 'numo': listnumo, 'numt': listnumt,'numdead': listnumdead, 'numre': listnumre, 'date_record': listdate})

models.py
【注意】如果要用models.py在MYSQL中生成数据表,需要把 class Meta:中的“ managed = False” 变成“ managed = True”,如果自建代表设置成FALSE,是不会在数据库中生成表的。
如果生成表格错误,按一下步骤操作:
1.在MYSQL中删除所有表;
2.删除epidemic应用下migrations文件夹下除__init__.py外所有文件
3.python manage.py makemigrations epidemic
4.python manage.py migrate

# This is an auto-generated Django model module.
# You'll have to do the following manually to clean this up:
#   * Rearrange models' order
#   * Make sure each model has one field with primary_key=True
#   * Make sure each ForeignKey and OneToOneField has `on_delete` set to the desired behavior
#   * Remove `managed = False` lines if you wish to allow Django to create, modify, and delete the table
# Feel free to rename the models, but don't rename db_table values or field names.
from django.db import models


class AuthGroup(models.Model):
    name = models.CharField(unique=True, max_length=150)

    class Meta:
        managed = False
        db_table = 'auth_group'


class AuthGroupPermissions(models.Model):
    group = models.ForeignKey(AuthGroup, models.DO_NOTHING)
    permission = models.ForeignKey('AuthPermission', models.DO_NOTHING)

    class Meta:
        managed = False
        db_table = 'auth_group_permissions'
        unique_together = (('group', 'permission'),)


class AuthPermission(models.Model):
    name = models.CharField(max_length=255)
    content_type = models.ForeignKey('DjangoContentType', models.DO_NOTHING)
    codename = models.CharField(max_length=100)

    class Meta:
        managed = False
        db_table = 'auth_permission'
        unique_together = (('content_type', 'codename'),)


class AuthUser(models.Model):
    password = models.CharField(max_length=128)
    last_login = models.DateTimeField(blank=True, null=True)
    is_superuser = models.IntegerField()
    username = models.CharField(unique=True, max_length=150)
    first_name = models.CharField(max_length=30)
    last_name = models.CharField(max_length=150)
    email = models.CharField(max_length=254)
    is_staff = models.IntegerField()
    is_active = models.IntegerField()
    date_joined = models.DateTimeField()

    class Meta:
        managed = False
        db_table = 'auth_user'


class AuthUserGroups(models.Model):
    user = models.ForeignKey(AuthUser, models.DO_NOTHING)
    group = models.ForeignKey(AuthGroup, models.DO_NOTHING)

    class Meta:
        managed = False
        db_table = 'auth_user_groups'
        unique_together = (('user', 'group'),)


class AuthUserUserPermissions(models.Model):
    user = models.ForeignKey(AuthUser, models.DO_NOTHING)
    permission = models.ForeignKey(AuthPermission, models.DO_NOTHING)

    class Meta:
        managed = False
        db_table = 'auth_user_user_permissions'
        unique_together = (('user', 'permission'),)


class DjangoAdminLog(models.Model):
    action_time = models.DateTimeField()
    object_id = models.TextField(blank=True, null=True)
    object_repr = models.CharField(max_length=200)
    action_flag = models.PositiveSmallIntegerField()
    change_message = models.TextField()
    content_type = models.ForeignKey('DjangoContentType', models.DO_NOTHING, blank=True, null=True)
    user = models.ForeignKey(AuthUser, models.DO_NOTHING)

    class Meta:
        managed = False
        db_table = 'django_admin_log'


class DjangoContentType(models.Model):
    app_label = models.CharField(max_length=100)
    model = models.CharField(max_length=100)

    class Meta:
        managed = False
        db_table = 'django_content_type'
        unique_together = (('app_label', 'model'),)


class DjangoMigrations(models.Model):
    app = models.CharField(max_length=255)
    name = models.CharField(max_length=255)
    applied = models.DateTimeField()

    class Meta:
        managed = False
        db_table = 'django_migrations'


class DjangoSession(models.Model):
    session_key = models.CharField(primary_key=True, max_length=40)
    session_data = models.TextField()
    expire_date = models.DateTimeField()

    class Meta:
        managed = False
        db_table = 'django_session'


class Linktable(models.Model):
    linktitle = models.CharField(max_length=100, blank=True, null=True)
    linkvule = models.CharField(max_length=100, blank=True, null=True)
    linkdate = models.DateField(blank=True, null=True)

    class Meta:
        managed = False
        db_table = 'linktable'


class Situation(models.Model):
    province = models.CharField(max_length=45, blank=True, null=True)
    number_doubt = models.IntegerField(blank=True, null=True)
    number_ok = models.IntegerField(blank=True, null=True)
    number_total = models.IntegerField(blank=True, null=True)
    time = models.DateTimeField(blank=True, null=True)

    class Meta:
        managed = False
        db_table = 'situation'


class Total(models.Model):
    city = models.CharField(max_length=45, blank=True, null=True)
    number_doubt = models.IntegerField(blank=True, null=True)
    number_ok = models.IntegerField(blank=True, null=True)
    number_total = models.IntegerField(blank=True, null=True)
    number_dead = models.IntegerField(blank=True, null=True)
    number_recure = models.IntegerField(blank=True, null=True)
    date_record = models.DateField(blank=True, null=True)
    totalcol = models.CharField(max_length=45, blank=True, null=True)

    class Meta:
        managed = False
        db_table = 'total'

insertvirusdata.py

import constkey
import getText
import getdynamicdata
import getvirusdynamic
from django.shortcuts import render
#from epidemic.models import DatabasePicupload
from datetime import datetime
from epidemic.models import Total,Linktable

def insertlink(request):
    list = getvirusdynamic.getdatalist()
    list.reverse()
    for l in list:
        ldate =l[0]
        title =l[1]
        link = l[2]
        record = Linktable(linktitle=title, linkvule=link, linkdate=ldate)
        record.save()
    return render(request, 'success.html',{'data':'链接数据添加成功'})

def insert(request):
    print('写入数据库')
    #list = ['昆明6例', '丽江2例', '[2020-02-06]', '昆明5例', '丽江1例', '[2020-02-05]']
    rold = Total.objects.order_by('id').last() #读取最后一条城市病例记录
    list = getdynamicdata.getvirusdata()
    list.reverse()
   # print(list)
    res=readdata(list)
   # print(res)
    for r in res:
        if str(rold.date_record) == r[2]:
            Total.objects.filter(date_record= rold.date_record,city = r[0]).update(number_ok=r[1])
            continue
        cityname = r[0]
        #print(cityname)
        numberd = 0
        numbero = r[1]
        numberdead = 0
        numberre = 0
        pday = datetime.strptime(r[2], '%Y-%m-%d')

        record = Total(city=cityname, number_doubt=numberd, number_ok=numbero, number_total=int(numberd) + int(numbero),number_dead=numberdead, number_recure=numberre, date_record=pday)
        record.save()
    return render(request, 'success.html',{'data':'数据添加成功'})

def readdata(list=['[2020-01-24]','昆明市1例', '丽江市0例', '[2020-01-23]', '昆明市0例', '丽江市0例' ]):
    result = []
    for l in list:
        tmpdate= getText.get_str_btw(l, '[', ']')
        if len(tmpdate)>0:
            virusdate = tmpdate
            tmpdate =''
            continue
        for city in constkey.const.CITYNAME:
            if city in l:
                citydata = (city,getText.get_str_btw(l, city, '例'),virusdate)
                #print(citydata)
                result.append(citydata)
                #print(result)
    return result

def insert_test(request):
    #list = ['昆明6例', '丽江2例', '[2020-02-06]', '昆明5例', '丽江1例', '[2020-02-05]']
    list = getdynamicdata.getvirusdata()
    list.reverse()
    print(list)
    res=readdata(list)
    print(res)
    '''
        for r in res:
        cityname = r[0]
        print(cityname)
        numberd = 0
        numbero = r[1]
        numberdead = 0
        numberre = 0
        pday = datetime.strptime(r[2], '%Y-%m-%d')

        record = Total(city=cityname, number_doubt=numberd, number_ok=numbero, number_total=int(numberd) + int(numbero),number_dead=numberdead, number_recure=numberre, date_record=pday)
        record.save()
    '''

    return render(request, 'test.html',{'data':list})

getvirusdynamic.py

# -*- coding: UTF-8 -*-
# 获取卫计委的列表链接
# 获取页面信息

# 输入:默认是 云南省卫计委网站疫情列表页
# 处理:获取所有列表的标题,链接,日期
# 输出:获取日期,标题,链接并添加到列表中


from selenium import webdriver
from bs4 import BeautifulSoup
import time
import getText
import constkey
from urllib.parse import urlparse
from urllib import parse
from epidemic.models import Situation,Total,Linktable


#browser = webdriver.Chrome('C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')#指定驱动路径
#browser = webdriver.Chrome(constkey.const.DRIVERPATH)#指定驱动路径
#browser = webdriver.Chrome()#PATH中配置环境变量不行,不知道为什么
#url = constkey.const.URL


#sonurl='/wjwWebsite/web/doc/UU158056990873682575'

def checkdata():
    message = '检查数据'
    r= Linktable.objects.order_by("id").last()
    result = str(r.linkvule)
    return result


def geturl(father_url,son_url):
    absoluteURL = parse.urljoin(father_url,son_url)
    #print(absoluteURL)
    return absoluteURL

def getdomain(url):
    domain =urlparse(url)
    #print(domain)
    father_url = domain.scheme+'://'+domain.netloc
    #print(father_url)
    return father_url

def getdatalist():     #最终接口函数
    url = constkey.const.URL  #使用默认的卫计委的疫情网站
    linklist=[]
    browser = webdriver.Chrome(constkey.const.DRIVERPATH)  # 指定驱动路径
    browser.get(url)
    fatherurl=getdomain(url)
    stop = False
    for i in range(1,10):
        html=browser.page_source
        soup=BeautifulSoup(html,'lxml')
        all_news=soup.find('div',class_='theSimilar').find_all('li')
        for news in all_news:
            new_info={}
            new_info['title']=news.find('a').get_text()
            #new_info['read_num']=news.find('a').get_link()
            if geturl(fatherurl,news.find('a').get("href"))  ==  checkdata():#判断是否已经读取过该链接
                stop = True
                break
            new_info['link']=geturl(fatherurl,news.find('a').get("href"))
            #new_info['read_num'] = news.find('a').get_absolute("href")#不行
            # absolute_links
           #new_info['date']=getText.get_str_btw(news.find('span').get_text(),'[',']')#因为日期格式是2022-12-03,而不是[2022-12-03]了,所以这个不用了
            new_info['date'] = news.find('span').get_text()
            #print(new_info)
            linklist.append((new_info['date'], new_info['title'], new_info['link']))

        #print('第%d页'%(i))
        if stop:
            break
        #读取多页
        key = "//*[@id='nav-page-ul']/li/a[text()='"+str(i+1)+"']" #定位页码关键字
        try:
            browser.find_element_by_xpath(key)
        except:    #如果不能找到,就退出循环
            break
        browser.find_element_by_xpath(key).click()
        time.sleep(1)
    browser.close()
    return linklist


getdynamicdata.py

# -*- coding: UTF-8 -*-
# 获取卫计委的列表链接
# 获取页面信息

# 输入:从数据库中读取链接
# 处理:获取各个城市病例数
# 输出:添加列表 日期,城市病例数
import datetime
import const
import requests
from bs4 import BeautifulSoup
import re
import getvirusdynamic
import getText
from epidemic.models import Situation,Total,Linktable

const.status_type=['确诊病例','危重','重症','死亡','出院','疑似病例','医学观察']
const.citylist=['昆明市','丽江市','西双版纳州','保山市','玉溪市','曲靖市','普洱市','德宏州','临沧市','昭通市','大理州','楚雄州','红河州','文山州','怒江州','迪庆州']
const.citykey=['kunming','lijiang','xishuangbanna','baoshan','yuxi','qujing','puer','dehong','lincang','zhaotong','dali','quxiong','honghe','wenshan','nujiang','diqing']

def getHTMLText(url): #获取网页源代码
    try:
        r = requests.get(url, timeout=300)
        r.raise_for_status()  # 如果状态码不是200,产生异常
        r.encoding = 'utf-8'  # 字符编码格式改成 utf-8
        return r.text
    except:
    # 异常处理
        return " error "

def findHTMLText(text):#获取指定的内容
    soup = BeautifulSoup( text, "html.parser" ) #返回BeautifulSoup对象
    return soup.find_all(string=re.compile('^\d{1,4}$|^[\u4e00-\u9fa5]|(市)|(出院)|(例)|(人)|(年)|(月)|(日)|(时)'))  # 结合正则表达式,实现字符串片段匹配

def getCONTENT(url): #获取匹配的数据内容
    text = getHTMLText(url)  # 获取html文本内容
    text = re.sub('\r|\n|\t', '', text)
    res = findHTMLText(text)  # 匹配结果
    i=0
    for r in res:
        s = re.sub('\xa0','',r)
        res[i] = "".join(s.split())
        i+=1
    return res

def yunnanold(list,data=['云南省'],citylist=const.citylist):#老版本使用,数字没有分开的网页内容提取
    text = max(list,key=len) #提取字符串最长的列表值
    print(text)
    for k in citylist:
        if not k in text:
            result =  k + '0' + '例'
            data.append(result)
            continue
        if k in '无死亡病例':
            if '无死亡病例' in text:
                result = '无死亡病例'
                data.append(result)
                continue
        if k=='医学观察':
            num =  getText.get_str_btw(text, k, '人')
            result = k + num + '人'
        else:
            num = getText.get_str_btw(text, k, '例')
            result = k + num + '例'
        data.append(result)
    return data

def cleandataold(city,list,data=['云南省']):  #依据城市删选出每个城市的病例数,默认是云南省
    new = False  #是否是新增
    yfind = False #是否找到城市
    place = ''

    for l in list:
        if '新增' in l:
            new = True
        if '确诊' in l:
            new = False
        if city in '无死亡' and '无死亡' in l:
            result = '无死亡病例'
            data.append(result)
            break
        if city in l:
            if new :
                continue
                #place = city + '新增'
            else:
                place = city
            yfind = True
        if l.isdigit() and yfind :
            result = place+l+'例'
            yfind = False
            data.append(result)
            if not new:
               return data
    return data

def cleandata(city,list,data=['云南省']):  #依据城市删选出每个城市的病例数,默认是云南省

    citykey = city
    yfind = True #是否找到城市数据
    cityfind = True #是否找到该城市
    place = ''
    text=''  #将列表合并为一个字符串
    for l in list:
        text=text+l
    #print(text)

    while not getText.is_number(getText.get_str_btw(text,citykey,'例')):
        if getText.get_str_btw(text, citykey, '例').strip() == '':
            cityfind=False
            break
        par = text.partition(citykey)
        text=par[2]
    if cityfind:
        num1 = getText.get_str_btw(text, citykey, '例')
        textpart = text.partition(citykey)[2]
        while not getText.is_number(getText.get_str_btw(textpart, citykey, '例')):
            if getText.get_str_btw(textpart, citykey, '例').strip() == '':
                yfind = False
                break
            par = textpart.partition(citykey)
            textpart = par[2]
        if yfind:  # 判断同一城市是否找到第2个数据
            num2 = getText.get_str_btw(textpart, citykey, '例')
        else:
            num2 = 0
        if int(num1) > int(num2):
            num = num1
        else:
            num = num2
    else:
        num = 0
    virus = citykey + str(num) + '例'
    #print(virus)
    data.append(virus)

    return data

def yunnan(list,data,citylist=const.citylist):  #获取所有列表类型的数值
    print('getdynamicdata.yunnan()')
    for city in citylist:
         result = cleandata(city, list,data)
    return result

def getlist(urllist, citylist=const.citylist,mutilist = ['云南省']):#默认是云南

   # urllist = getvirusdynamic.getdatalist() #直接获取从网页获取链接

    #print(urllist)
    for u in urllist:
        urlone = u[2]
        if not '肺炎疫情情况' in u[1]:  #筛选只显示疫情情况
            continue
        res = getCONTENT(urlone)
        ldate = getText.get_str_btw(u[0],'[',']')#获取时期

        '''
        if datetime.datetime.strptime(ldate,'%Y-%m-%d')>datetime.datetime.strptime('2020-1-30','%Y-%m-%d'):
        
            print('new')
            mutilist = yunnan(res,mutilist,citylist)
        else:
            print('old')
            mutilist = yunnanold(res, mutilist, citylist)
        '''
        mutilist = yunnan(res, mutilist, citylist)
        mutilist.append(u[0]) #添加日期
    return mutilist

def getoldlinkdata():
    record = Total.objects.order_by('id').last()
    print(record.date_record)
    return record.date_record


def geturllistfromdatabase():
    print('从数据库读取链接列表')
    urllist = []
    dayd = getoldlinkdata()
    print('fail')
    #list = Linktable.objects.filter(linkdate__gte = datetime.datetime.strptime('2020-2-9','%Y-%m-%d')).order_by("-id").all()#读取全部数据
    list = Linktable.objects.filter(linkdate__gte=dayd).order_by("-id") # 读取全部数据

    d=datetime.datetime.strptime('2020-1-20','%Y-%m-%d')
    for l in list:
        if not '肺炎疫情情况' in l.linktitle:
            continue
        if d == l.linkdate:
            continue
        d=l.linkdate
        u = ('['+str(l.linkdate)+']', l.linktitle, l.linkvule)
        urllist.append(u)
    #urllist.reverse()
    print('打印geturllistfromdatabase()')
    print(urllist)
    return urllist

def getvirusdata():  #最终输入函数
    #云南省卫计委:
    #url = 'http://ynswsjkw.yn.gov.cn/wjwWebsite/web/col?id=UU157976428326282067&pId=UU145102906505319731&cn=xxgzbd&pcn=ztlm&pid=UU145102906505319731'

    urllist = geturllistfromdatabase()
    #urllist = [('[2020-2-4]','云南省卫生健康委员会关于云南省新型冠状病毒感染的肺炎疫情情况的通报','http://ynswsjkw.yn.gov.cn/wjwWebsite/web/doc/UU157987576940339792')]

    datalist= getlist(urllist, citylist=const.citylist, mutilist=['云南省']) # 默认是云南

    return datalist



def getvirusdata00():#单页1月31号之前的
    url1 = 'http://ynswsjkw.yn.gov.cn/wjwWebsite/web/doc/UU158004254342965198'
    kk = getCONTENT(url1)
    #print(kk)
    # new=cleandata(const.citylist,kk,data=['云南省'])
    datalist = yunnanold(kk, data=['云南省'], citylist=const.citylist)
    datalist.append('[2020-2-7]')
    # old=cleandataold(const.citylist,kk,data=['云南省'])
    #print(datalist)
    return datalist

def getvirusdata11():#单页1月31号之后的
    url1 = 'http://ynswsjkw.yn.gov.cn/wjwWebsite/web/doc/UU158101410083798470'
    kk = getCONTENT(url1)
    #print(kk)
    # new=cleandata(const.citylist,kk,data=['云南省'])
    datalist = yunnan(kk, data=['云南省'], citylist=const.citylist)
    datalist.append('[2020-2-6]')
    # old=cleandataold(const.citylist,kk,data=['云南省'])
    #print(datalist)
    return datalist



getText.py

#用来从字符串中获取指定内容
#s 为字符串
#f b 为约束字符串

def get_int_after(text, f):#获取某字符后的int型: 获取text中指定字符串f后面的数字
    S = text.upper()
    F = f.upper()
    par = S.partition(F)
    print(par)
    int_str = ""
    for c in par[2]:
        if c in ("-", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"):
            int_str += c
        else:
            if c == ":" or c == "=" or c == " ":
                if int_str == "":
                    continue
            break
    try:
        return int(int_str)
    except:
        print("Get Int After Fail")
        print(f, text)
        return "Parsing error"

def get_hex_after(text, f): #获取某字符后的hex型:获取text中指定字符串f后面的数字(hex型数字)
    par = text.partition(f)
    hex = 0
    for c in par[2]:
        cap_c = c.capitalize()
        if ord(cap_c) in range(ord("0"), ord("9") + 1):
            hex *= 16
            hex += ord(cap_c) - ord("0")
        elif ord(cap_c) in range(ord("A"), ord("F") + 1):
            hex *= 16
            hex += ord(cap_c) - ord("A") + 10
        else:
            if c == ":" or c == "=" or c == " " or c =="x":
                if hex == 0:
                    continue
            break
    return hex

def get_str_btw(text, f, b):#获取两个字符之间的内容:获取text中,f之后,b之前的内容
    par = text.partition(f)
    return (par[2].partition(b))[0][:]


def is_number(s):#判断s是否为数字
    try:
        float(s)
        return True
    except ValueError:
        pass

    try:
        import unicodedata
        unicodedata.numeric(s)
        return True
    except (TypeError, ValueError):
        pass

    return False

主要用到的代码就是这些了,具体的关系,准备画个思维导图,画好传上来

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值