披上神装去打怪刷副本,站着撸,尤其像龙之谷这样需要大量刷副本和巢穴的游戏
再如打王者荣耀,咬牙切齿,不会因为铭文差一点攻击而无法击杀对面英雄
而seo这样无节操的工作,想要快速提出需求,解决问题,提升流量,懂一门脚本语言会更加如鱼得水
当然seo圈很多鲜活的例子证明,只会火车头以及cms也能年入几十万,油费还是很够的
最后学啥语言好,当然是python,理由是什么,学python还需理由?我也不知道在说啥
无细分,不需求,记录一下工作中python在seo中的应用!或者说懂技术的seo能折腾点啥
遍历同目录下所有xml文件并提取字段,拼接url
#encoding=utf-8
import os,re
def panduan(num):
num=int(num)
if num<10:
nums='0'+str(num)
else:
nums=str(num)
return nums
for xml in os.listdir('.'):
if '.xml' in xml:
f=open(xml).read()
data=re.compile(r'(.*?)[\s\S]*?(.*?)[\s\S]*?(.*?)')
ziduan=re.findall(data,f)
for i in ziduan:
ID=i[0]
mulu=i[1]
years=(i[2].split(' ')[0]).split('/')[0]
months=(i[2].split(' ')[0]).split('/')[1]
days=(i[2].split(' ')[0]).split('/')[2]
shijianchuo=years+panduan(months)+panduan(days)
print ('https://www.xxxxxx.com/%s/%s/ems%s.html')%(mulu,shijianchuo,ID)
定期更新采集
#coding:utf-8
import urllib2,re,pycurl,StringIO,sys,lxml,requests,time
from bs4 import BeautifulSoup
str_time=time.strftime('%Y-%m-%d',time.localtime())
op_txt=open('url.txt','a')
url = 'https://www.pincai.com/sitemap/group1.htm'
html=requests.get(url).content
soup = BeautifulSoup(html,"lxml")
zidian={}
c=0
with open('url.txt') as f:
for i in f.readlines():
i=i.strip()
zidian['%s'%(i)]=c
c+=1
for urllist in re.findall(re.compile(r'
.*?href="(.*?)" target="_blank">(.*?)'),str(soup)):url_data=urllist[0].strip()
title=urllist[1]
if '2017' in title:
print title,url_data
if zidian.has_key(url_data):
print u'没有更新'+str_time
continue
else:
print u'成功更新'+str_time
op_txt.writelines('%s\n'%url_data)
# url="https://www.kanzhun.com/k-news/"
# html=urllib2.urlopen(url).read()
# #print html
# for urllist in re.findall('
(.*?)',html):# #print urllist[0],urllist[1]
# if '春节' in urllist[1]:
# print urllist[1],urllist[0]
v1版本加介词for
# -*- coding: utf-8 -*-
#build by bigwayseo.com
import time
import sys
reload(sys)
sys.setdefaultencoding('utf8')
l=['iphone','samsung','sony','galaxy','xperia']
op_txt=open('done.txt','a')
class NaiveFilter():
def __init__(self):
self.keywords = set([])
def parse(self, path):
for keyword in open(path):
self.keywords.add(keyword.strip().encode('utf-8').lower())
# print self.keywords
def filter(self, message):
apple='yes'
message = unicode(message).lower()
for k in self.keywords:
replss=r"for %s"%k
if k in message and replss not in message:
for i in l:
c=i+' '+k
if c not in message:
message=message.replace(k, replss)
else:
f=r'for %s'%c
message=message.replace(k, f)
print message
apple='no'
break
if apple=='no':
break
op_txt.write('%s\n'%message)
# print message
if __name__ == '__main__':
f = NaiveFilter()
f.parse("brands.txt") #brands.txt里面放要敏感词或不想要的词等
a=[i.strip() for i in open('word.txt').readlines()] #word.txt是将要过滤的词库
c=len(a)
for i in range(c):
f.filter(a[i])
下载sitemap的压缩文件
#encoding=utf-8
import requests
print "downloading with requests"
for num in range(2,1018):
url = 'https://www.xxxxx.com/s/baidu_sitemap%d.txt.gz'%num
r = requests.get(url)
with open("C:\Users\Administrator\Desktop\sitemap\sitemap%d.zip"%num, "wb") as code:
code.write(r.content)
合并日志文件
#coding=utf-8
import os
import sys
import glob
def dirTxtToLargeTxt(dir,outputFileName):
'''从dir目录下读入所有的TXT文件,将它们写到outputFileName里去'''
#如果dir不是目录返回错误
if not os.path.isdir(dir):
print "传入的参数有错%s不是一个目录" %dir
return False
#list all txt files in dir
outputFile = open(outputFileName,"a")
for txtFile in glob.glob(os.path.join(dir,"*.txt")):
print txtFile
inputFile = open(txtFile,"rb")
for line in inputFile:
outputFile.write(line)
return True
if __name__ =="__main__":
if len(sys.argv) < 3:
print "Usage:%s dir outputFileName" %sys.argv[0]
sys.exit()
dirTxtToLargeTxt(sys.argv[1],sys.argv[2])
重命名一个目录下所有文件夹下的文件名
#encoding=utf-8
import os,sys
reload(sys)
sys.setdefaultencoding('utf-8')
# path = 'C:\Users\Administrator\Desktop\image\\'
for i in os.listdir('C:\Users\Administrator\Desktop\image'):
f=1
img_dir='C:\Users\Administrator\Desktop\image\%s\\'%i
# print img_dir
for n in os.listdir(img_dir):
pic_name=n.decode('gbk') # .decode('gbk')
new_name=img_dir+i+'(%s).jpg'%f
path=img_dir+pic_name
print path
os.rename(path, new_name)
f+=1
print u"重命名成功"
关键词去重
wen1_dict={}
c=0
for wen1_line in open('wen1.txt'):
wen1=wen1_line.strip()
wen1_dict['%s'%(wen1)]=c
c+=1
for i in range(1,10):
i=str(i)
if wen1_dict.has_key(i):
continue
else:
print i
待续…