python学习的感受

如果你有其他编程基础的话学习Python肯定不会太难,我的本命是java,但是java我也是个渣,几乎只会增删改查,做过最难的也是增删该查,不过基本功能模块以及上级交代的任务,我还是可以完成的,最近因为工作需要,我开始学习python,贴一下自己的代码,我用的是python3.7

import urllib.request

url = "http://www.baidu.com/"
# 打开所需要的网址
response = urllib.request.urlopen(url)
# read() 读取内容   decode() 二进制转换字符串   默认utf-8
print(response.read())
# 获取请求的url
print(response.geturl())
# 获取头部信息  返回列表里面有的元素   dict() 转换成 json类型
print(dict(response.getheaders()))
# 获取状态码
print(response.getcode())
# 按行读取 返回列表,都是字节类型
print(response.readlines())
'''保存至文件里面  文件名:baidu.html  w  字符型打开类型
with open('baidu.html','w',encoding='utf8') as fp:
    fp.write(response.read().decode())
'''
# 二进制类型   wb 二进制打开方式 创建文件
with open('baidu1.html','wb') as fp:
     fp.write(response.read())


# 图片处理
image_url = "https://ss0.bdstatic.com/94oJfD_bAAcT8t7mm9GUKT-xh_/timg?image&quality=100&size=b4000_4000&sec=1562431650&di=be95dba31e377e497b6c5914ada7bd33&src=http://upload4.95171.cn/pic/AESH10001446/4655.jpg"
response = urllib.request.urlopen(image_url)
print(response)
# 保存图片只能写于本地二进制格式
with open('mienv.jpg','wb') as pf:
   pf.write(response.read())

# 第二种保存图片方式  格式  文件url   文件名.jpg
urllib.request.urlretrieve(image_url,'chi.jpg')

刚开始感觉python好简单啊,而且兴趣贼强,写着写着发现好难但是跟Java比起来代码确实要少很多,比如连接MySQL直接导入pymysql 导包 pip install pymysql命令, 写几行代码就ok, 但是越往后越感觉难, 应该实我的代码量太少了,http协议什么的还有html标签class都需要了解,  后期反爬更难,不多我肯定要学习的,java~python共存 不管多难一定要学习,我感觉我的学习方法还是不行,靠的是走量来完成的,写的多了才能记住,也是应为这点让我发现了我是个普通人啊,IQ没有太高,记忆力也不行,所以要更加努力,加油,学习多少记多少,兴趣很重要,完成一点东西自己就感觉好骄傲。IT行业路难走,得一直追寻着新的技术的学习,追寻,我也想学习,但是路很迷茫,无法规划自己的路怎么走,学习东西三天两头给拉下,自学两三天就不想学了,就像学别的,技术诱惑太多,我无法便知哪是我的路,我特别希望我的技术路上有一位可以带我走出迷雾的大佬,我是真的在迷雾里面,对我而言实际开发了才有冲劲,才有解决一切困难的status,学习之路很漫长,也很遥远,希望我的道路上能碰到一位能带我走出迷雾的老师,在学校里是学生,出来社会是职场人,我想我的职场之路有老师以及一起学习的同学,写点鸡汤安慰一下自己:哎,写不出来,算了。希望能碰见一起学习互相帮忙可以比拼的人。

 

贴一下最近写的python爬虫爬取百度贴吧:应该是可以直接用的 

"""
Python写的百度贴吧工具
"""
import pymysql

host = 'localhost'
db_name = 'test'
username = 'root'
password = 'Admin@123'


def _get_connection(host, username, password, db_name):
    return pymysql.connect(host=host,
                           user=username,
                           password=password,
                           charset='utf8mb4',
                           db=db_name)




def _insert_table(connection, username):
    insert_table_sql = """
    INSERT INTO tieba_bing 
    VALUES(%s)"""

    with connection.cursor() as cursor:
        cursor.execute(insert_table_sql, (username))
        connection.commit()


import urllib.request as request
from bs4 import BeautifulSoup
import re
import log_config
import logging

logger = logging.getLogger()

encoding = 'GBK'

base_url = 'http://tieba.baidu.com/bawu2/platform/listMemberInfo?word=%BB%AC%B1%F9'
# base_url = 'http://tieba.baidu.com/bawu2/platform/listMemberInfo?word=%B9%FD%C1%CB%BC%B4%CA%C7%BF%CD'
start_page = 1
total_pages = None

connection = _get_connection(host, username, password, db_name)


def _get_total_pages():
    html = request.urlopen(base_url).read().decode(encoding)
    soup = BeautifulSoup(html, 'lxml')
    page_span = soup.find('span', class_='tbui_total_page')
    p = re.compile(r'共(\d+)页')
    result = p.match(page_span.string)
    global total_pages
    total_pages = int(result.group(1))

    logger.info(f'会员共{total_pages}页')


def _find_all_users():
    global connection
    for i in range(start_page, total_pages + 1):
        target_url = f'{base_url}&pn={i}'
        logger.info(f'正在分析第{i}页')
        html = request.urlopen(target_url).read().decode(encoding)
        soup = BeautifulSoup(html, 'lxml')
        outer_div = soup.find('div', class_='forum_info_section member_wrap clearfix bawu-info')
        inner_spans = outer_div.find_all('span', class_='member')
        for index, span in enumerate(inner_spans):
            name_link = span.find('a', class_='user_name')
            name = name_link.string
            logger.info(f'已找到 {name}')

            try:
                _insert_table(connection, name)
            except Exception as e:
                logger.error(f'第{i}页{index}第个用户 {name} 发生异常')


import datetime

if __name__ == '__main__':
    _get_total_pages()
    _find_all_users()

import logging

# 创建Logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# 创建Handler

# 终端Handler
consoleHandler = logging.StreamHandler()
consoleHandler.setLevel(logging.DEBUG)

# 文件Handler
fileHandler = logging.FileHandler('log.log', mode='a', encoding='UTF-8')
fileHandler.setLevel(logging.ERROR)

# Formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
consoleHandler.setFormatter(formatter)
fileHandler.setFormatter(formatter)

# 添加到Logger中
logger.addHandler(consoleHandler)
logger.addHandler(fileHandler)

还有百度贴吧头像的:这个是放在文件夹里面,头像吧的

"""
Python写的百度贴吧爬取用户头像工具
"""
import pymysql

host = 'localhost'
db_name = 'test'
username = 'root'
password = 'Admin@123'


def _get_connection(host, username, password, db_name):
    return pymysql.connect(host=host,
                           user=username,
                           password=password,
                           charset='utf8mb4',
                           db=db_name)


# def _insert_table(connection, username):
#     insert_table_sql = """
#     INSERT INTO tieba_bing
#     VALUES(%s)"""

# with connection.cursor() as cursor:
#     cursor.execute(insert_table_sql, (username))
#     connection.commit()


import urllib.request as request
from bs4 import BeautifulSoup
import re
import log_config
import logging
import requests
import os

logger = logging.getLogger()

encoding = 'GBK'

base_url = 'http://tieba.baidu.com/bawu2/platform/listMemberInfo?word=%CD%B7%CF%F1'
# base_url = 'http://tieba.baidu.com/bawu2/platform/listMemberInfo?word=%B9%FD%C1%CB%BC%B4%CA%C7%BF%CD'
start_page = 1
total_pages = None

connection = _get_connection(host, username, password, db_name)

file_path='D:/book/img1'
def _get_total_pages():
    html = request.urlopen(base_url).read().decode(encoding)
    soup = BeautifulSoup(html, 'lxml')
    page_span = soup.find('span', class_='tbui_total_page')
    p = re.compile(r'共(\d+)页')
    result = p.match(page_span.string)
    global total_pages
    total_pages = int(result.group(1))

    logger.info(f'会员共{total_pages}页')


def strip(path):
    path = re.sub(r'[?\\*/"<>:/]', '', str(path))
    return path



def _find_all_users():
    global connection
    for i in range(start_page, total_pages + 1):
        target_url = f'{base_url}&pn={i}'
        logger.info(f'正在分析第{i}页')
        print(f'正在分析第{i}页')
        html = request.urlopen(target_url).read().decode(encoding)
        soup = BeautifulSoup(html, 'lxml')
        outer_div = soup.find('div', class_='forum_info_section member_wrap clearfix bawu-info')
        inner_spans = outer_div.find_all('span', class_='member')
        for index, span in enumerate(inner_spans):
            name_link = span.find('a', class_='user_name')
            name_img = span.find('img')
            print(name_img['src'])
            name = name_link.string
            url_img = name_img['src']

            try:
                # 是否有这个路径
                if not os.path.exists(file_path):
                    # 创建路径
                    os.makedirs(file_path)
                    # 获得图片后缀
                file_suffix = '.jpg'
                print(file_suffix)
                # 拼接图片名(包含路径)
                filename = '{}{}{}{}'.format(file_path, os.sep, name, file_suffix)
                print(filename)
                # 下载图片,并保存到文件夹中
                request.urlretrieve(url_img, filename=filename)

            except IOError as e:
                print("IOError")
            except Exception as e:
                print("Exception")

            # response = requests.session().get(url_img)
            # # 写入
            # img_data = response.content
            # with open('头像图片', 'wb') as fd:
            #   fd.write(img_data)
            #   print(url_img)
            logger.info(f'已找到 {name}')

        # print(name)

    # try:
    #     _insert_table(connection, name)
    # except Exception as e:
    #     logger.error(f'第{i}页{index}第个用户 {name} 发生异常')


import datetime

if __name__ == '__main__':
    _get_total_pages()
    _find_all_users()

import logging

# 创建Logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# 创建Handler

# 终端Handler
consoleHandler = logging.StreamHandler()
consoleHandler.setLevel(logging.DEBUG)

# 文件Handler
fileHandler = logging.FileHandler('log.log', mode='a', encoding='UTF-8')
fileHandler.setLevel(logging.ERROR)

# Formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
consoleHandler.setFormatter(formatter)
fileHandler.setFormatter(formatter)

# 添加到Logger中
logger.addHandler(consoleHandler)
logger.addHandler(fileHandler)

希望可以帮助各位刚刚,我会定期把自己所学的发上来,希望大佬点评一下

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值