baidu贴吧邮箱采集小工具【写着玩】

这里写自定义目录标题

小工具写着玩

之前有一个小业务,需要进行批量邮箱协议群发,然后需要采集一些真实的邮箱数据,最后想了一下让用户自己留下的基本上都是真实的,tieba又属于一个流量比较大的地方,后来就写了一个小教本进行测试;

工具还可以进行继续的延伸,比如说批量采集帖子,或者说直接 输入某个贴吧名后,直接把这个吧里面的所有帖子都采集一次;目前贴吧
其实也有比较大的风控,所以cookie 需要自行去解决

import os
import re
import requests
import time
import urllib3
from bs4 import BeautifulSoup
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


#帖子请求
def get_tieba_url_request(url):
    headers = {
        'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
        'Accept-Language':'zh-CN,zh;q=0.9',
        'Connection':'keep-alive',
        'Cookie':'XFI=a7d17700-8440-11ee-83ac-c1665e3fb88c; XFCS=5FE2CCE3B26175874395D9B57093E47CB9FCA2C663EE7C7C349D9D838F2993BF; XFT=NJTyvqxEhVRCgUAJ2+P0fuIaHOrdJfqa3sC+jNOG9Do=; BIDUPSID=EDDAF011D517DD5F8526B74215AB4EC9; PSTM=1688621075; BAIDUID=38E64620B89BF9ED7F4418C078F3FA7A:FG=1; BDUSS=VczYmZrT0w5aTRvb09WdDBWWHdXRlo2aFdSUzYwV3NzWFlvQURYT05EZGREUjVsSVFBQUFBJCQAAAAAAQAAAAEAAABuKrw~xrbH7su5tdm30gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF2A9mRdgPZkQ; BDUSS_BFESS=VczYmZrT0w5aTRvb09WdDBWWHdXRlo2aFdSUzYwV3NzWFlvQURYT05EZGREUjVsSVFBQUFBJCQAAAAAAQAAAAEAAABuKrw~xrbH7su5tdm30gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAF2A9mRdgPZkQ; newlogin=1; STOKEN=ddb417856b1e3da56ca796f300506e915f4ddd993c855891aab28ba8cda5b3e9; H_WISE_SIDS_BFESS=110085_264354_268593_271170_270102_274778_276533_271563_277031_277161_277355_275732_278055_272563_278575_278388_279021_279610_277757_279012_279749_279999_278415_259642_276438_280809_280557_278791_280635_281038_281235_277970_281232_281367_279203_280437_281094_281520; Hm_lvt_98b9d8c2fd6608d564bf2ac2ae642948=1698126102,1698902772,1700098710; USER_JUMP=-1; st_key_id=17; 5364263534_FRSVideoUploadTip=1; video_bubble5364263534=1; wise_device=0; H_PS_PSSID=39635_39669_39673_39664_39684_39694_39676_39678_39713_39740; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BAIDUID_BFESS=38E64620B89BF9ED7F4418C078F3FA7A:FG=1; ZFY=Wb1tH:AZ4:BZj9y2:BJUusTBjVYzyZics9VP4QaL22tPN0:C; delPer=0; PSINO=3; BAIDU_WISE_UID=wapp_1700102029361_689; arialoadData=false; H_WISE_SIDS=39635_39669_39673_39664_39684_39694_39676_39678_39713_39740; XFI=956c21e0-843c-11ee-b883-45510d578902; XFCS=A1395407BFF53914CDBA12B8F843DDC3ED65E5DBEF1285BBA053FCAA4C9F50BD; XFT=7ZvJaZXFmql3O5yIAm+Ku0cTRLQZ/UdO/82Pe+Gj2BY=; RT="z=1&dm=baidu.com&si=77dad1ea-ef2d-417f-9960-7117c29c953a&ss=lp0pt9zo&sl=x&tt=r9d&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=5xsr&nu=cebq8ru3&cl=5xe6&ul=5xh3&hd=5y6b"; tb_as_data=cf23fbb996369c3d712df0cb4cd906d2df97a42545d16b9077a3b0d9f3e79c303ad42750500d9e3e2d5395f7eefe9971cc996474adf7b3e70dcacfe2aefcece668659c3b960735aa8cb9a267be1b27ce5f92d983bd0c6e996e384378ce64b00b1d5d8f2b9aa676c5dbe966eed970df73; Hm_lpvt_98b9d8c2fd6608d564bf2ac2ae642948=1700112372; BA_HECTOR=2485812g80al85agak2l8h231ilb9vk1r; ab_sr=1.0.1_MWQ3ODdkYzdkMDhkODlkMGE1ZjhjMDFmZWE2YTI3Njc1ODQ3MGU3Y2MyNjkxMTllZDYxYThmMzMwMzViZmZhOGU3YTc1NzIyNWE2NDNiZTAzODQwMmUwYjhiMGU5OTg5MjNhMjU5OGQ2ZTY1Nzg2MjQ5ZTBkNWZkMzQ5MmQ2MWJiNzA5ZjljODdmNWU3YjU2M2FjZjUxMGQ4NDVmMWIyMjNhZTMxNDA3ZTY4ZTQ2MDdmYTlkMzljNmQ2ZjRmYzhm; st_data=2280640e7dd502f250f770692dbb46081a224de35ef3b6f396493a615f071feb338a07deb489b09ec58afd7e0024375cc52019d6f4fdb22dd5abdaa6cb1ed526127793dac697c20951d94eb99604ab75e8ec3baddb3041b57ee5809b35bf413327e095eda8ab467f0e5bdbff52f13ecaf10734b5a452918eab2a6a2fe8f88dc74fb65af3e23ccf090b2e4b9933939ff8; st_sign=b4d2cd91',
        'Host':'tieba.baidu.com',
        'Referer':'https://tieba.baidu.com/f?ie=utf-8&kw=%E5%A4%96%E8%B4%B8%E8%8B%B1%E8%AF%AD&fr=search',
        'Upgrade-Insecure-Requests':'1',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
    }
    res = requests.get(url=url,headers=headers,verify=False,timeout=10)
    if res.status_code == 200:
        print(f'{url}访问成功>>>')
        html_data = res.text
    return html_data

#获取帖子页数
def get_page(html_data):
    soup = BeautifulSoup(html_data,'lxml')
    page = soup.select('#thread_theme_5 > div.l_thread_info > ul > li:nth-child(2) > span:nth-child(2)')[0].get_text()
    print(f'页数一共=>{page}')
    return page

#循环访问页数
def get_tieba_page_request(url,page):
    for i in range(1,int(page)):
        html_data = get_tieba_url_request(url+'?pn=' + str(i))
        html_data=html_data.encode('utf-8').decode('raw_unicode_escape')
        email_list = re.findall(r'(\w+@\w+\.[a-z]+)',html_data,re.S)
        print(email_list)
        time.sleep(5)


if __name__ == '__main__':
    url = 'https://tieba.baidu.com/p/7371997'
    html_data = get_tieba_url_request(url)
    page = get_page(html_data)
    get_tieba_page_request(url, page)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的,以下是一个简单的Java Swing实现的百度贴吧GUI示例代码: ```java import javax.swing.*; import java.awt.*; public class BaiduTiebaGUI extends JFrame { public BaiduTiebaGUI() { super("百度贴吧"); // 设置窗口大小和位置 setSize(400, 600); setLocationRelativeTo(null); // 创建主面板 JPanel mainPanel = new JPanel(new BorderLayout()); // 创建顶部搜索栏 JPanel searchPanel = new JPanel(new BorderLayout()); JTextField searchField = new JTextField(); JButton searchButton = new JButton("搜索"); searchPanel.add(searchField, BorderLayout.CENTER); searchPanel.add(searchButton, BorderLayout.EAST); // 创建中间帖子列表 JPanel postListPanel = new JPanel(new GridLayout(20, 1)); for (int i = 1; i <= 20; i++) { String title = "帖子" + i; String author = "作者" + i; String date = "2021-01-01"; String content = "这是第" + i + "个帖子的内容。"; JPanel postPanel = new JPanel(new BorderLayout()); postPanel.setBorder(BorderFactory.createLineBorder(Color.GRAY)); JLabel titleLabel = new JLabel(title); JLabel authorLabel = new JLabel(author); JLabel dateLabel = new JLabel(date); JTextArea contentTextArea = new JTextArea(content); contentTextArea.setEditable(false); postPanel.add(titleLabel, BorderLayout.NORTH); postPanel.add(authorLabel, BorderLayout.WEST); postPanel.add(dateLabel, BorderLayout.EAST); postPanel.add(contentTextArea, BorderLayout.CENTER); postListPanel.add(postPanel); } // 创建底部分页栏 JPanel pagePanel = new JPanel(new FlowLayout(FlowLayout.CENTER)); for (int i = 1; i <= 10; i++) { JButton pageButton = new JButton(String.valueOf(i)); pagePanel.add(pageButton); } // 将组件添加到主面板 mainPanel.add(searchPanel, BorderLayout.NORTH); mainPanel.add(postListPanel, BorderLayout.CENTER); mainPanel.add(pagePanel, BorderLayout.SOUTH); // 将主面板添加到窗口中 add(mainPanel); // 设置窗口可见性 setVisible(true); } public static void main(String[] args) { new BaiduTiebaGUI(); } } ``` 运行该代码,可以看到一个简单的百度贴吧GUI界面,其中包含了顶部的搜索栏、中间的帖子列表和底部的分页栏。你可以根据实际需求修改代码和界面样式,使其更符合你的要求。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值