爬取失信人信息
import requests as r
import MySQLdb
conn = MySQLdb.connect(
host = "****",
user = "root",
password = "****",
port = 3306,
charset = "utf8",
db = "spider"
)
cursor = conn.cursor()
class TrustBreaker:
'''
人名 iname
身份证号 cardNum
执行法院 gistUnit
省份 areaNameNew
案号 gistId
生效法律文书确定的义务 duty
被执行人的履行情况 performance
失信被执行人行为具体情形 disruptTypeName
发布时间 publishDate
'''
@classmethod
def get_breaker(self):
url = "https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php?"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.17 Safari/537.36'
}
# p = 0
# while 1:
# page = p * 10
# p += 1
for i in range(0,50):
page = i*10
params = {
'resource_id': 6899,
'query': '失信人',
'pn': str(page),
'rn': str(10),
'ie': 'utf-8',
'oe': 'utf-8'
}
res = r.request(method="GET",url=url,headers=headers,params=params).json()
data_list = res["data"]
for i in data_list:
for j in i["result"]:
iname = j["iname"]
print(iname)
cardnum = j["cardNum"]
gistunit = j["gistUnit"]
areanamenew = j["areaNameNew"]
gistid = j["gistId"]
duty = j["duty"]
performance = j["performance"]
disrupttypename = j["disruptTypeName"]
publishdate = j["publishDate"]
sql = 'insert into breaker_spider(iname,cardnum,gistunit,areanamenew,gistid,duty,performance,disrupttypename,publishdate) values(%s,%s,%s,%s,%s,%s,%s,%s,%s) '
cursor.execute(sql,(iname,cardnum,gistunit,areanamenew,gistid,duty,performance,disrupttypename,publishdate))
conn.commit()
cursor.close()
conn.close()
if __name__ == '__main__':
TrustBreaker.get_breaker()