python学习的感受

最新推荐文章于 2024-04-02 06:04:28 发布

郝小祺

最新推荐文章于 2024-04-02 06:04:28 发布

阅读量511

点赞数 1

分类专栏： python 文章标签： python 程序员学习的痛苦程序员 java python爬虫

本文链接：https://blog.csdn.net/weixin_43400608/article/details/95218397

版权

python 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

如果你有其他编程基础的话学习Python肯定不会太难，我的本命是java，但是java我也是个渣，几乎只会增删改查，做过最难的也是增删该查，不过基本功能模块以及上级交代的任务，我还是可以完成的，最近因为工作需要，我开始学习python，贴一下自己的代码，我用的是python3.7

import urllib.request

url = "http://www.baidu.com/"
# 打开所需要的网址
response = urllib.request.urlopen(url)
# read() 读取内容   decode() 二进制转换字符串   默认utf-8
print(response.read())
# 获取请求的url
print(response.geturl())
# 获取头部信息  返回列表里面有的元素   dict() 转换成 json类型
print(dict(response.getheaders()))
# 获取状态码
print(response.getcode())
# 按行读取 返回列表,都是字节类型
print(response.readlines())
'''保存至文件里面  文件名：baidu.html  w  字符型打开类型
with open('baidu.html','w',encoding='utf8') as fp:
    fp.write(response.read().decode())
'''
# 二进制类型   wb 二进制打开方式 创建文件
with open('baidu1.html','wb') as fp:
     fp.write(response.read())


# 图片处理
image_url = "https://ss0.bdstatic.com/94oJfD_bAAcT8t7mm9GUKT-xh_/timg?image&quality=100&size=b4000_4000&sec=1562431650&di=be95dba31e377e497b6c5914ada7bd33&src=http://upload4.95171.cn/pic/AESH10001446/4655.jpg"
response = urllib.request.urlopen(image_url)
print(response)
# 保存图片只能写于本地二进制格式
with open('mienv.jpg','wb') as pf:
   pf.write(response.read())

# 第二种保存图片方式  格式  文件url   文件名.jpg
urllib.request.urlretrieve(image_url,'chi.jpg')

刚开始感觉python好简单啊，而且兴趣贼强，写着写着发现好难但是跟Java比起来代码确实要少很多，比如连接MySQL直接导入pymysql 导包 pip install pymysql命令，写几行代码就ok，但是越往后越感觉难，应该实我的代码量太少了，http协议什么的还有html标签class都需要了解，后期反爬更难，不多我肯定要学习的，java~python共存不管多难一定要学习，我感觉我的学习方法还是不行，靠的是走量来完成的，写的多了才能记住，也是应为这点让我发现了我是个普通人啊，IQ没有太高，记忆力也不行，所以要更加努力，加油，学习多少记多少，兴趣很重要，完成一点东西自己就感觉好骄傲。IT行业路难走，得一直追寻着新的技术的学习，追寻，我也想学习，但是路很迷茫，无法规划自己的路怎么走，学习东西三天两头给拉下，自学两三天就不想学了，就像学别的，技术诱惑太多，我无法便知哪是我的路，我特别希望我的技术路上有一位可以带我走出迷雾的大佬，我是真的在迷雾里面，对我而言实际开发了才有冲劲，才有解决一切困难的status，学习之路很漫长，也很遥远，希望我的道路上能碰到一位能带我走出迷雾的老师，在学校里是学生，出来社会是职场人，我想我的职场之路有老师以及一起学习的同学，写点鸡汤安慰一下自己：哎，写不出来，算了。希望能碰见一起学习互相帮忙可以比拼的人。

贴一下最近写的python爬虫爬取百度贴吧：应该是可以直接用的

"""
Python写的百度贴吧工具
"""
import pymysql

host = 'localhost'
db_name = 'test'
username = 'root'
password = 'Admin@123'


def _get_connection(host, username, password, db_name):
    return pymysql.connect(host=host,
                           user=username,
                           password=password,
                           charset='utf8mb4',
                           db=db_name)




def _insert_table(connection, username):
    insert_table_sql = """
    INSERT INTO tieba_bing 
    VALUES(%s)"""

    with connection.cursor() as cursor:
        cursor.execute(insert_table_sql, (username))
        connection.commit()


import urllib.request as request
from bs4 import BeautifulSoup
import re
import log_config
import logging

logger = logging.getLogger()

encoding = 'GBK'

base_url = 'http://tieba.baidu.com/bawu2/platform/listMemberInfo?word=%BB%AC%B1%F9'
# base_url = 'http://tieba.baidu.com/bawu2/platform/listMemberInfo?word=%B9%FD%C1%CB%BC%B4%CA%C7%BF%CD'
start_page = 1
total_pages = None

connection = _get_connection(host, username, password, db_name)


def _get_total_pages():
    html = request.urlopen(base_url).read().decode(encoding)
    soup = BeautifulSoup(html, 'lxml')
    page_span = soup.find('span', class_='tbui_total_page')
    p = re.compile(r'共(\d+)页')
    result = p.match(page_span.string)
    global total_pages
    total_pages = int(result.group(1))

    logger.info(f'会员共{total_pages}页')


def _find_all_users():
    global connection
    for i in range(start_page, total_pages + 1):
        target_url = f'{base_url}&pn={i}'
        logger.info(f'正在分析第{i}页')
        html = request.urlopen(target_url).read().decode(encoding)
        soup = BeautifulSoup(html, 'lxml')
        outer_div = soup.find('div', class_='forum_info_section member_wrap clearfix bawu-info')
        inner_spans = outer_div.find_all('span', class_='member')
        for index, span in enumerate(inner_spans):
            name_link = span.find('a', class_='user_name')
            name = name_link.string
            logger.info(f'已找到 {name}')

            try:
                _insert_table(connection, name)
            except Exception as e:
                logger.error(f'第{i}页{index}第个用户 {name} 发生异常')


import datetime

if __name__ == '__main__':
    _get_total_pages()
    _find_all_users()

import logging

# 创建Logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# 创建Handler

# 终端Handler
consoleHandler = logging.StreamHandler()
consoleHandler.setLevel(logging.DEBUG)

# 文件Handler
fileHandler = logging.FileHandler('log.log', mode='a', encoding='UTF-8')
fileHandler.setLevel(logging.ERROR)

# Formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
consoleHandler.setFormatter(formatter)
fileHandler.setFormatter(formatter)

# 添加到Logger中
logger.addHandler(consoleHandler)
logger.addHandler(fileHandler)

还有百度贴吧头像的：这个是放在文件夹里面，头像吧的

"""
Python写的百度贴吧爬取用户头像工具
"""
import pymysql

host = 'localhost'
db_name = 'test'
username = 'root'
password = 'Admin@123'


def _get_connection(host, username, password, db_name):
    return pymysql.connect(host=host,
                           user=username,
                           password=password,
                           charset='utf8mb4',
                           db=db_name)


# def _insert_table(connection, username):
#     insert_table_sql = """
#     INSERT INTO tieba_bing
#     VALUES(%s)"""

# with connection.cursor() as cursor:
#     cursor.execute(insert_table_sql, (username))
#     connection.commit()


import urllib.request as request
from bs4 import BeautifulSoup
import re
import log_config
import logging
import requests
import os

logger = logging.getLogger()

encoding = 'GBK'

base_url = 'http://tieba.baidu.com/bawu2/platform/listMemberInfo?word=%CD%B7%CF%F1'
# base_url = 'http://tieba.baidu.com/bawu2/platform/listMemberInfo?word=%B9%FD%C1%CB%BC%B4%CA%C7%BF%CD'
start_page = 1
total_pages = None

connection = _get_connection(host, username, password, db_name)

file_path='D:/book/img1'
def _get_total_pages():
    html = request.urlopen(base_url).read().decode(encoding)
    soup = BeautifulSoup(html, 'lxml')
    page_span = soup.find('span', class_='tbui_total_page')
    p = re.compile(r'共(\d+)页')
    result = p.match(page_span.string)
    global total_pages
    total_pages = int(result.group(1))

    logger.info(f'会员共{total_pages}页')


def strip(path):
    path = re.sub(r'[?\\*/"<>:/]', '', str(path))
    return path



def _find_all_users():
    global connection
    for i in range(start_page, total_pages + 1):
        target_url = f'{base_url}&pn={i}'
        logger.info(f'正在分析第{i}页')
        print(f'正在分析第{i}页')
        html = request.urlopen(target_url).read().decode(encoding)
        soup = BeautifulSoup(html, 'lxml')
        outer_div = soup.find('div', class_='forum_info_section member_wrap clearfix bawu-info')
        inner_spans = outer_div.find_all('span', class_='member')
        for index, span in enumerate(inner_spans):
            name_link = span.find('a', class_='user_name')
            name_img = span.find('img')
            print(name_img['src'])
            name = name_link.string
            url_img = name_img['src']

            try:
                # 是否有这个路径
                if not os.path.exists(file_path):
                    # 创建路径
                    os.makedirs(file_path)
                    # 获得图片后缀
                file_suffix = '.jpg'
                print(file_suffix)
                # 拼接图片名（包含路径）
                filename = '{}{}{}{}'.format(file_path, os.sep, name, file_suffix)
                print(filename)
                # 下载图片，并保存到文件夹中
                request.urlretrieve(url_img, filename=filename)

            except IOError as e:
                print("IOError")
            except Exception as e:
                print("Exception")

            # response = requests.session().get(url_img)
            # # 写入
            # img_data = response.content
            # with open('头像图片', 'wb') as fd:
            #   fd.write(img_data)
            #   print(url_img)
            logger.info(f'已找到 {name}')

        # print(name)

    # try:
    #     _insert_table(connection, name)
    # except Exception as e:
    #     logger.error(f'第{i}页{index}第个用户 {name} 发生异常')


import datetime

if __name__ == '__main__':
    _get_total_pages()
    _find_all_users()

import logging

# 创建Logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# 创建Handler

# 终端Handler
consoleHandler = logging.StreamHandler()
consoleHandler.setLevel(logging.DEBUG)

# 文件Handler
fileHandler = logging.FileHandler('log.log', mode='a', encoding='UTF-8')
fileHandler.setLevel(logging.ERROR)

# Formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
consoleHandler.setFormatter(formatter)
fileHandler.setFormatter(formatter)

# 添加到Logger中
logger.addHandler(consoleHandler)
logger.addHandler(fileHandler)

希望可以帮助各位刚刚,我会定期把自己所学的发上来，希望大佬点评一下