import requests,re,json,os,random,time
from bs4 import BeautifulSoup
with open('d://zhihu//zimei//followers_info.txt','r') as f:
f=f.read().strip().split('\n')
with open('d://headers1.txt','r') as h:
headers1={}
for line in h.read().strip().split('\n'):
name,v=line.strip().split(':',1)
headers1[name]=v
a=[i.split(',') for i in f]
#urls=['https://www.zhihu.com/people/'+i[0]+'/following/columns' for i in a]
headers={**headers1,**{"User-Agent": "Opera/9.80 (Android 2.3.3; Linux; Opera Mobi/ADR-1202011015; U; en) Presto/2.9.201 Version/11.50"}}
def get_people(j):
column_urls_info=['https://www.zhihu.com/api/v4/members/{1}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&offset={0}0&limit=20'.format(i,j[0]) for i in range(2,3)]
column_urls_info.insert(0,'https://www.zhihu.com/api/v4/members/{0}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&limit=20'.format(j[0]))
s=requests.Session()
for url in column_urls_info:
r=s.get(url,headers=headers).content.decode('utf-8')
columns=json.loads(r)
columns=columns['data']
column_info={}
for column in columns:
column_info['title']=column['title']
column_info['image_url']=column['image_url']
column_info['id']=column['id']
column_info['followers']=column['followers']
column_info['intro']=column['intro']
column_info['articles_count']=column['articles_count']
column_info['author_name']=column['author']['name']
if not os.path.isdir('d://zhihu//%s'%column['id']):
os.mkdir('d://zhihu//%s'%column['id'])
with open('d://zhihu//%s//column_info.txt'%column['id'],'a',errors='replace') as f:
f.write(str(column_info))
for j in a:
try:
get_people(j)
except:
print(j)
continue
from bs4 import BeautifulSoup
with open('d://zhihu//zimei//followers_info.txt','r') as f:
f=f.read().strip().split('\n')
with open('d://headers1.txt','r') as h:
headers1={}
for line in h.read().strip().split('\n'):
name,v=line.strip().split(':',1)
headers1[name]=v
a=[i.split(',') for i in f]
#urls=['https://www.zhihu.com/people/'+i[0]+'/following/columns' for i in a]
headers={**headers1,**{"User-Agent": "Opera/9.80 (Android 2.3.3; Linux; Opera Mobi/ADR-1202011015; U; en) Presto/2.9.201 Version/11.50"}}
def get_people(j):
column_urls_info=['https://www.zhihu.com/api/v4/members/{1}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&offset={0}0&limit=20'.format(i,j[0]) for i in range(2,3)]
column_urls_info.insert(0,'https://www.zhihu.com/api/v4/members/{0}/following-columns?include=data%5B*%5D.intro%2Cfollowers%2Carticles_count%2Cimage_url%2Cis_following%2Clast_article.created&limit=20'.format(j[0]))
s=requests.Session()
for url in column_urls_info:
r=s.get(url,headers=headers).content.decode('utf-8')
columns=json.loads(r)
columns=columns['data']
column_info={}
for column in columns:
column_info['title']=column['title']
column_info['image_url']=column['image_url']
column_info['id']=column['id']
column_info['followers']=column['followers']
column_info['intro']=column['intro']
column_info['articles_count']=column['articles_count']
column_info['author_name']=column['author']['name']
if not os.path.isdir('d://zhihu//%s'%column['id']):
os.mkdir('d://zhihu//%s'%column['id'])
with open('d://zhihu//%s//column_info.txt'%column['id'],'a',errors='replace') as f:
f.write(str(column_info))
for j in a:
try:
get_people(j)
except:
print(j)
continue