huaweitman的专栏

专注于数据结构、算法、c/c++、python、数据挖掘等

unbalanced parenthesis




#!/usr/env  python
#-*- coding: utf-8  -*-
import urllib 
import urllib2 
import random 
import requests
import os,sys 
import MySQLdb
from sgmllib import SGMLParser 
from BeautifulSoup import BeautifulSoup
import re
num=0
def main():
	#try:
		#conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='googlemarket',charset="utf8")
		#conn.query("set names utf8")
	#except Exception,e:
		#print e
		#sys.exit()
	#cursor=conn.cursor() 
	#for k in range(0,34):
		url="https://play.google.com/store/apps/details?id=com.androidesk&hl=zh_CNhttps%3A%2F%2Fplay.google.com%2Fstore%2Fapps%2Fdetails%3Fid%3Dcom.androidesk"
		#print k
		html=requests.get(url)
		preresult=html.content
		soup=BeautifulSoup(preresult)
		result=soup.prettify("utf-8")
		#名称
		pattern=re.compile('itemprop="name">[\s\S]*?<div>([\s\S]*?)</div>')
		data0=re.findall(pattern,result)
		for items in data0:
			print itemsunbalanced parenthesis
		#制造商
		pattern=re.compile('itemprop="name">([\s\S]*?)</a>')
		#data1=re.findall(pattern,result)
		#for items in data1
			#print items
		#版本
		pattern=re.compile('itemprop="softwareVersion">([\s\S]*?)</div>')
		data2=re.findall(pattern,result)
		print data2[0]
		#更新时间 
		pattern=re.compile('itemprop="datePublished">([\s\S]*?)</div>')
		data3=re.findall(pattern,result)
		#print data3[0]
		#文件大小
		pattern=re.compile('itemprop="fileSize">([\s\S]*?)</div>')
		data4=re.findall(pattern,result)
		#print data4[0]
		#支持固件
		pattern=re.compile('itemprop="operatingSystems">([\s\S]*?)</div>')
		data5=re.findall(pattern,result)
		#print data5[0]
		#说明
		pattern=re.compile('itemprop="description">[\s\S]*?<div>([\s\S]*?)</div>')
		data6=re.findall(pattern,result)
		for items in data6:
			pass#print re.sub('<br />',' ',items)
			#sql="insert into address(name,version,developer,pubtime,filesize,support,introduction) values(%s,%s,%s,%s,%s,%s,%s)"
			#for items in data6:
			
				#if(data5):
					#values=(data0[0],data1[0],data2[0],data3[0],data4[0],data5[0],re.sub('<br />',' ',items))
				#else:
					#values=(data0[0],data1[0],data2[0],data3[0],data4[0],'NULL',re.sub('<br />',' ',items))
				#print values
				#print sql % values
				#cursor.execute(sql,values)
				#conn.commit()
		pattern=re.compile('<img class="cover-image" src="(.+?)" alt')
		data=re.findall(pattern,result)
		global num
		for j in data:
			temp=requests.get(j[1:-2])
			f=file("googlemarket/"+str(num),"w+")
			f.write(temp.content)
	
			
		
				
	
    
if  __name__=="__main__":
       main()



Traceback (most recent call last):
  File "crawler0729.py", line 85, in <module>
    main()
  File "crawler0729.py", line 56, in main
    pattern=re.compile('itemprop="description">[\s\S]*?<div>"([\s\S]*?)"</div>')
  File "/usr/lib/python2.7/re.py", line 190, in compile
    return _compile(pattern, flags)
  File "/usr/lib/python2.7/re.py", line 242, in _compile
    raise error, v # invalid expression
sre_constants.error: unbalanced parenthesis


阅读更多
个人分类: python
想对作者说点什么? 我来说一句

Parenthesis

Parenthesis

logch logch

2016-09-03 23:00:51

阅读数:1446

没有更多推荐了,返回首页

加入CSDN,享受更精准的内容推荐,与500万程序员共同成长!
关闭
关闭