python爬取网站数据,实习生都能看懂

# This is a sample Python script.

# Press ⌃R to execute it or replace it with your code.
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.


import requests
from bs4 import BeautifulSoup

headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
    "Cookie": "_gcl_au=1.1.157925045.1699868121; _cc_id=69309fb62e39727e03233b60d7fd6b44; panoramaId_expiry=1700472929116; panoramaId=6fb9e46bfc70dbef5fdd3a7ed956bd9563cee374515c94676fa89ed906a5f3e6; panoramaIdType=panoIndiv; _au_1d=AU1D-0100-001699868130-R8JCVO48-VN1S; _gid=GA1.2.400307972.1699868133; cookie_notice_accept=1; _session_id=b416d3d62ce1525860702096d86fdc2a; __cf_bm=MWovnh7XGS_Lepgmg03BIsahZwMuFwCxdWxlc0Z9B1M-1699926402-0-ASov0bq7seaEA0lnDrQTvbAZAH01CuMwfdzHJ9hu4LhNt1kHHB0BySVU0MlQdVPFBaB01HK8nQTqQfzPEEZGrC0=; geckoTableFdvStats=false; _au_last_seen_pixels=eyJhcG4iOjE2OTk5MjY0MDUsInR0ZCI6MTY5OTkyNjQwNSwicHViIjoxNjk5OTI2NDA1LCJydWIiOjE2OTk5MjY0MDUsInRhcGFkIjoxNjk5OTI2NDA1LCJhZHgiOjE2OTk5MjY0MDUsImdvbyI6MTY5OTkyNjQwNSwidGFib29sYSI6MTY5OTg2ODEzMCwiaW1wciI6MTY5OTkyNjQwNSwiYmVlcyI6MTY5OTg2ODE1MiwicHBudCI6MTY5OTg2ODE1MiwidW5ydWx5IjoxNjk5ODY4MTUyLCJhbW8iOjE2OTk4NjgxNTIsImFkbyI6MTY5OTg2ODE1Miwic29uIjoxNjk5ODY4MTUyLCJpbmRleCI6MTY5OTkyNjQwNSwib3BlbngiOjE2OTk4NjgxNTIsImNvbG9zc3VzIjoxNjk5OTI2NDA1LCJzbWFydCI6MTY5OTg2OTE5M30%3D; __gads=ID=73f02a8d4238aea7:T=1699926405:RT=1699926405:S=ALNI_MZiMT3EDaYOXoWBvTrdzTc8scs8iQ; __gpi=UID=00000c85ed0e61b0:T=1699926405:RT=1699926405:S=ALNI_MapvNooXEs3rHXjBlQudlGA8kIfqA; _ga=GA1.2.1536824870.1699868120; _ga_LJR3232ZPB=GS1.1.1699926403.3.0.1699926432.0.0.0",
    "Referer": "https://www.coingecko.com/zh?page=1",
    "Sec-Ch-Ua": '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": "macOS",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "same-origin",
    "Sec-Fetch-User": "?1",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/517.36",
}
res = requests.get(url="https://token.unlocks.app/", headers=headers)
soup = BeautifulSoup(res.content.decode("utf-8"), 'html.parser')
formatted_html = soup.prettify()
results = soup.find_all('tr', {'class': 'rc-table-row rc-table-row-level-0'})
for result in results:
    firstTd = result.find('td', {'class': 'rc-table-cell rc-table-cell-fix-left rc-table-cell-fix-left-last'})
    name = firstTd.get_text()
    link = firstTd.find('img')['src']
    price = result.find('p', {'class': 'font-inter text-[10px] leading-[12px] laptop:text-[12px] laptop:leading-['
                                       '16px] text-black dark:text-black-dark font-medium text-left'}).get_text()
    day = result.find('p', {'class': ['font-inter text-[10px] leading-[12px] laptop:text-[12px] laptop:leading-[16px] '
                                      'text-symmetric-danger dark:text-symmetric-dark-danger font-medium '
                                      'text-left', 'font-inter text-[10px] leading-[12px] laptop:text-[12px] '
                                                   'laptop:leading-[16px] text-symmetric-success '
                                                   'dark:text-symmetric-dark-success font-medium text-left']}).get_text()
    cap = result.find('p', {'class': 'font-inter text-[10px] leading-[12px] laptop:text-[12px] laptop:leading-[16px] '
                                     'text-black dark:text-black-dark font-medium tabular-nums tracking-[-1px] '
                                     'text-left'}).get_text()
    supply = result.find('p', {'class': 'font-inter text-[10px] leading-[12px] laptop:text-[12px] laptop:leading-['
                                        '16px] text-black dark:text-black-dark font-medium tabular-nums tracking-['
                                        '-1px] text-left'}).get_text()
    unlocked = result.find('p', {'class': 'font-inter text-[10px] leading-[12px] laptop:text-[12px] laptop:leading-['
                                          '16px] text-black dark:text-black-dark font-medium text-left min-w-[36px] '
                                          'text-right'}).get_text()
    emission = result.find('p', {'class': 'font-inter text-[10px] leading-[12px] laptop:text-[12px] laptop:leading-['
                                          '16px] text-black dark:text-black-dark font-medium text-left '
                                          'text-right'}).get_text()
    print(f'Project Name: {name}\nLink: {link}\nPrice: {price}\nday: {day}\ncap: {cap}\nsupply: {supply}\nunlocked: {unlocked}\nemission: {emission}\n')
# print(formatted_html)
# See PyCharm help at https://www.jetbrains.com/help/pycharm/

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值