文本文件中获取http链接

最新推荐文章于 2021-06-19 01:17:25 发布

aircoder

最新推荐文章于 2021-06-19 01:17:25 发布

阅读量485

点赞数

分类专栏： python 文章标签： json javascript python ViewUI

本文链接：https://blog.csdn.net/aircoder/article/details/84419797

版权

python 专栏收录该内容

16 篇文章 0 订阅

订阅专栏

#!/usr/bin/python2.7
#coding=utf-8

import sys
import MySQLdb
import time
import hashlib
import datetime
import json
import urllib2
import httplib,urllib
import zim

reload(sys)
sys.setdefaultencoding('utf-8')
del sys.setdefaultencoding
print time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))

listtags = []
for i in range(97,123):

listtags.append(chr(i))
listtags.append(str(chr(i)).upper())

for i in range(0,10):

listtags.append(str(i))

listtags.append(str('/'));
listtags.append(str('.'));
listtags.append(str(':'));

def visit_oapi(purl):

response = urllib2.urlopen(purl)
html = response.read()
#print html

conn = MySQLdb.connect(host='10.13.81.11', user='portal',passwd='portal@sohu', db='smc_user', port=3306, charset='utf8')
cur =conn.cursor()
sql = "select passport from tbl_passport_bind_status WHERE app_id =1 ORDER BY id DESC LIMIT 3000"
cur.execute(sql)
uids = cur.fetchall()

url = "http://internal.passport.sohu.com/openlogin/api/timeline/home";
appkey = 'f@JclHjuQ`DORG<f0,OqS/Q(Lpp4&G'
appid = '1106'

for id in uids:
#print id[0]
userid = str(id[0])
ct = str(str(time.time()).split('.')[0])
code = hashlib.md5(userid+appid+appkey+ct).hexdigest()
dicts={}

dicts['userid'] = userid
dicts['openid'] = userid
dicts['ct'] = ct

dicts['code'] = code
dicts['appid'] = appid

sjson = json.dumps(dicts)
print sjson
purl = url +'?json='+ sjson
visit_oapi(url)
req = urllib2.Request(url, sjson)
response = urllib2.urlopen(req)
the_page = response.read()
zim.getHttp(the_page,userid)
#print the_page

--------------------------------------------------

#!/usr/bin/python
#coding=utf-8

import string

listtags = []
for i in range(97,123):

listtags.append(chr(i))
listtags.append(str(chr(i)).upper())

for i in range(0,10):

listtags.append(str(i))

listtags.append(str('/'));
listtags.append(str('.'));
listtags.append(str(':'));
def writeFile(userid, listlink):
fileWriteObj = open("output.txt", 'w')
for i in listlink:
fileWriteObj.write(userid+'\t'+i+'\n')
fileWriteObj.close()

def getHttp(content,userid):

listlinks = []
content = string.lower(content)
inx = string.find(content,'http',0,len(content))
while inx>0:
links =""
hindex = inx+len('http')
content = content[hindex:]
i=0
while i < len(content):
chars = str(content[i])
if chars in listtags:
links+=chars
i+=1
else:
print links
inx = string.find(content,'http',0,len(content))
if string.find(links,'www') < 0:
if(isHttpLink('http'+links)):
listlinks.append('http'+links)
break
writeFile(userid,listlinks)
import urllib2
import httplib,urllib
def isHttpLink(url):
try:
response = urllib2.urlopen(url)
html = response.read()
i = string.find(html,'<title>搜狐新闻客户端-首页</title>')
j = string.find(html,'你访问的页面地址有误')
if i > 0 or j >0:
print ' you are is bad ...........',url
return True
except Exception, e:
print ' you are is error! ...........',url
return False

if __name__ == '__main__':

stra = 'sssgsdgdfhttp://w353.5345中哦噶诺..http://sfjsdlfkjs77888)i'
getHttp(stra,'werw')

aircoder

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
文本文件中获取http链接

#!/usr/bin/python2.7 #coding=utf-8import sysimport MySQLdbimport timeimport hashlibimport datetimeimport jsonimport urllib2import httplib,urllibimport zimreload(sys)sys...
复制链接

扫一扫

专栏目录