刷某网站新贴,呵呵哒
import os,sys,types
import time,requests,json
from selenium import webdriver
from bs4 import BeautifulSoup
def gettime():
return time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
HOSTURLS = []
def HostLoc():
try:
url = "https://*.com/forum-45-1.html"
heads = {"user-agent":"firefox"}
r = requests.get(url, headers = heads,timeout=5)
#print(r.text)
soup = BeautifulSoup(r.text, 'html.parser')
links = soup.find_all("a", class_="s xst")
for item in links:
s = item.get("href")
if (s in HOSTURLS):
continue
else:
HOSTURLS.append(s)
s = "https://*.com/" + s
t = item.next_element[:250]
print(gettime(),s,t)
except:
pass
if __name__ == '__main__':
while True:
print("*" * 60)
time.sleep(5)
HostLoc()