记得还是7月份下旬的时候无意看到pyquery库,能做爬虫,还不需要写正则什么的,还是蛮实用的,简单的写了一些代码,抓取网络数据,装的是anaconda 默认是2.7版本,代码如下:
from pyquery import PyQuery as pq
import pandas as pd
from datetime import datetime
import MySQLdb
shars_tabls=pd.DataFrame();
code=[]
name=[]
indname=[]
for i in range(1, 10):
urlxx = "http://yunvs.com/list/mai_" + str(i) + ".html"
v_source = pq(url=urlxx)
for data in v_source('tr'):
v_code = pq(data).find('td').eq(0).text()
v_name = pq(data).find('td').eq(1).text()
v_ind = pq(data).find('td').eq(5)
xx=[]
x=""
for i in range(len(pq(v_ind).find('a'))):
v_indname = pq(v_ind).find('a').eq(i).text()
xx.append(v_indname)
x=','.join(xx)
code.append(v_code)
name.append(v_name)
indname.append(x)
data = {'v_code':code,'v_name':name,'v_ind':indname}
frame =pd.DataFrame(data)
frame1=frame[frame.v_name!=""]
frame1.to_excel("D:\\Users\\zhoumeixu204\\Desktop\\shearstable.xls",encoding="utf-8",index=False)
def test_create():
conn=MySQLdb.connect(host="localhost",user="root",passwd="",db="dataframe")
cursor=conn.cursor()
sql="create table if not exists frame(code char(60) ,name varchar(500),indname varchar(500)) ENGINE=InnoDB DEFAULT CHARSET=utf8"
cursor.execute(sql)
cursor.close()
conn.commit()
conn.close
test_create()
def test_insert():
dstart=datetime.now()
print "progaram start at"+dstart.strftime("%Y-%m-%d %H:%M:%S")
conn=MySQLdb.connect(host="localhost",user="root",passwd="",db="dataframe",use_unicode=True, charset="utf8")
cursor=conn.cursor()
sql="insert into frame(code,name,indname) values(%s,%s,%s)"
temp=zip(code,name,indname)
cursor.executemany(sql,temp)
cursor.close()
conn.commit()
conn.close
dend=datetime.now()
print("the time now is %s,total cost %s"%(dend.strftime("%Y-%m-%d %H:%M:%S"),dend-dstart))
test_insert()
print("success")