python2和python3爬去我主良缘交友网站指定要求的妹子图片

最新推荐文章于 2024-05-23 23:03:54 发布

小仙女的小跟班_fairy

最新推荐文章于 2024-05-23 23:03:54 发布

阅读量509

点赞数

本文链接：https://blog.csdn.net/qq_32670879/article/details/81902264

版权

python2.7环境下的python代码详情

#encoding=utf-8
# Created by double lin at 2018/8/19
#获取源代码
#解析页面，查看是否有需要的信息
#下载图片
#下载个人信息

#在练习中要学会进行，代码优化
#可读性强
#耦合度低，，面向对象
#独立模块，不要一开始到结尾

import json
import os
import requests

# 定义需要检索的年龄值
def query_age():
    age = int(raw_input('请输入您想要匹配的年龄信息（如：20）:'))
    if 21 <= age <= 30:
        startage = 21
        endage = 30
    elif 31 <= age <= 40:
        startage = 31
        endage = 40
    elif 41 <= age <= 50:
        startage = 41
        endage = 50
    elif 51 <= age <= 60:
        startage = 51
        endage = 60
    else:
        startage = 61
        endage = 99
    return startage, endage

# 定义需要进行检索的性别值
def query_sex():
    sex = raw_input('请输入您想要匹配的性别信息（如：女）:')
    if sex == '男':
        sex = 1
    else:
        sex = 2
    return sex

# 定义城市信息--写死的信息格式

# 定义身高信息
def query_height():
    height = int(raw_input('请输入您想要匹配的身高信息（如：161）：'))
    if height < 150:
        startheight = 0
        endheight = 150
    elif 151 <= height <= 160:
        startheight = 151
        endheight = 160
    elif 161 <= height <= 170:
        startheight = 161
        endheight = 170
    elif 171 <= height <= 180:
        startheight = 171
        endheight = 180
    elif 181 <= height <= 190:
        startheight = 181
        endheight = 190
    else:
        startheight = 191
        endheight = 250
    return startheight, endheight

# 月薪
def query_salary():
    salary = int(raw_input('请输入您想要匹配的薪资水平（如：2000）：'))

    if 2000 <= salary <= 5000:
        salary = 2
    elif 5000 <= salary <= 10000:
        salary = 3
    elif 10000 <= salary <= 20000:
        salary = 4
    elif salary >= 20000:
        salary = 5
    else:
        salary = 0
    return salary

# 获取一个网页中所有的妹子图片
def get_one_page(url):
    r = requests.get(url)
    r.encoding = r.apparent_encoding
    # r = r.text
    # data = json.loads(r)['data']['list']
    data = r.json()['data']['list']
    for item in data:
        # print item
        download_img(item)
        download_info(item)


# 判断是否存在文件夹，如果不存在，创建文件夹，然后下载图片到文件夹中
def download_img(item):
    if not os.path.exists('images'):
        os.mkdir('images')
    username = item['username']
    username = username.replace('*','')
    username = username.replace('\\','')
    username = username.replace('/', '')
    imgUrl = item['avatar']

    file_path = u'images/{}.png'.format(username)
    if not os.path.exists(file_path):
        # print '开始{}的信息下载'.format(username)

        with open(file_path, 'wb') as f:
            r = requests.get(imgUrl)
            f.write(r.content)
            f.close()
        print '{}的信息下载完毕'.format(username)
    else:
        print '您要下载的图片已存在，继续下一张图片下载'

# 保存客户的个人信息
def download_info(item):
    if not os.path.exists('info'):
        os.mkdir('info')

    item['username'] = item['username'].replace('*', '')
    item['username'] = item['username'].replace('\\', '')
    item['username'] = item['username'].replace('/', '')
    file_path = u'info/{}.txt'.format(item['username'])
    if not os.path.exists(file_path):
        # print '开始{}的个人信息下载'.format(item['username'])
        with open(file_path, 'wb') as f:
            f.write('ID:'+item['userid']+'\n')
            f.write('用户名:'+item['username']+'\n')
            f.write('省份:'+item['province']+'\n')
            f.write('城市:'+item['city']+'\n')
            f.write('内心独白:'+item['monolog']+'\n')
            f.write('学历:'+item['education']+'\n')
            f.write('生日年份:'+item['birthdayyear']+'\n')
            f.close()

if __name__ == '__main__':
    # 链式赋值
    startage, endage = query_age()
    sex = query_sex()
    startheight, endheight = query_height()
    salary = query_salary()

    for page in range(1,11):
        url = 'http://www.lovewzly.com/api/user/pc/list/search?startage={}&endage={}&gender={}&cityid=180&startheight={}&endheight={}&salary={}&marry=1&page={}'.format(startage,endage,sex,startheight,endheight,salary,page)
        get_one_page(url)

Python3.7环境下的爬虫代码信息

#encoding=utf-8
# Created by double lin at 2018/8/19
#获取源代码
#解析页面，查看是否有需要的信息
#下载图片
#下载个人信息

#在练习中要学会进行，代码优化
#可读性强
#耦合度低，，面向对象
#独立模块，不要一开始到结尾

import json
import os
import requests
import re

# 定义需要检索的年龄值
def query_age():
    age = int(input('请输入您想要匹配的年龄信息（如：20）:'))
    if 21 <= age <= 30:
        startage = 21
        endage = 30
    elif 31 <= age <= 40:
        startage = 31
        endage = 40
    elif 41 <= age <= 50:
        startage = 41
        endage = 50
    elif 51 <= age <= 60:
        startage = 51
        endage = 60
    else:
        startage = 61
        endage = 99
    return startage, endage

# 定义需要进行检索的性别值
def query_sex():
    sex = input('请输入您想要匹配的性别信息（如：女）:')
    if sex == '男':
        sex = 1
    else:
        sex = 2
    return sex

# 定义城市信息--写死的信息格式

# 定义身高信息
def query_height():
    height = int(input('请输入您想要匹配的身高信息（如：161）：'))
    if height < 150:
        startheight = 0
        endheight = 150
    elif 151 <= height <= 160:
        startheight = 151
        endheight = 160
    elif 161 <= height <= 170:
        startheight = 161
        endheight = 170
    elif 171 <= height <= 180:
        startheight = 171
        endheight = 180
    elif 181 <= height <= 190:
        startheight = 181
        endheight = 190
    else:
        startheight = 191
        endheight = 250
    return startheight, endheight

# 月薪
def query_salary():
    salary = int(input('请输入您想要匹配的薪资水平（如：2000）：'))

    if 2000 <= salary <= 5000:
        salary = 2
    elif 5000 <= salary <= 10000:
        salary = 3
    elif 10000 <= salary <= 20000:
        salary = 4
    elif salary >= 20000:
        salary = 5
    else:
        salary = 0
    return salary

# 获取一个网页中所有的妹子图片
def get_one_page(url):
    r = requests.get(url)
    r.encoding = r.apparent_encoding
    # r = r.text
    # data = json.loads(r)['data']['list']
    data = r.json()['data']['list']

    for item in data:
        print (item)
        download_img(item)
        download_info(item)


# 判断是否存在文件夹，如果不存在，创建文件夹，然后下载图片到文件夹中
def download_img(item):
    if not os.path.exists('images'):
        os.mkdir('images')
    username = item['username']
    username = username.replace('*','')
    username = username.replace('\\','')
    username = username.replace('/', '')
    imgUrl = item['avatar']

    file_path = u'images/{}.png'.format(username)
    if not os.path.exists(file_path):
        # print '开始{}的信息下载'.format(username)

        with open(file_path, 'wb') as f:
            r = requests.get(imgUrl)
            f.write(r.content)
            f.close()
        print ('{}的信息下载完毕'.format(username))
    else:
        print ('您要下载的图片已存在，继续下一张图片下载')

# 保存客户的个人信息
def download_info(item):
    if not os.path.exists('info'):
        os.mkdir('info')

    # print (type(item['userid']))
    # print (type(item['username']))

    item['username'] = item['username'].replace('*', '')
    item['username'] = item['username'].replace('\\', '')
    item['username'] = item['username'].replace('/', '')
    file_path = u'info/{}.txt'.format(item['username'])
    if not os.path.exists(file_path):
        # print '开始{}的个人信息下载'.format(item['username'])
        # rstr = r"[\/\\\:\*\?\"\<\>\|\-]"  # '/ \ : * ? " < > |'
        # item['username'] = re.sub(rstr, "_", item['username'])  # 替换为下划线

        with open(file_path, 'wb') as f:
            # f.write('ID:%d' %int(item['userid'])+'\n')

            # 在python3的环境中进行向txt文件中写入字符串的操作时，需要以bytes的形式将我们的字符串写入到文件中，
            # 采用的方式为（f.write(bytes(string=yourString, '编码格式（如：utf-8)'))
            f.write(bytes('用户名：'+item['username'] +'\n', 'UTF-8'))
            f.write(bytes('省份：' + item['province'] +'\n', 'UTF-8'))
            f.write(bytes('城市：' + item['city'] +'\n', 'UTF-8'))
            f.write(bytes('学历：' + item['education'] +'\n', 'UTF-8'))
            f.write(bytes('内心独白：' + item['monolog'] +'\n', 'UTF-8'))
            f.write(bytes('生日：' + item['birthdayyear'] +'\n', 'UTF-8'))

            f.close()

# 定义函数，删除一个文本中出现的包含表情的特殊字符
def remove_emoji(text):
    return emoji_pattern.sub(r'', text)

if __name__ == '__main__':
    startage, endage = query_age()
    sex = query_sex()
    startheight, endheight = query_height()
    salary = query_salary()

    # 暂时没使用到这个部分进行用户名中特殊字符的判断
    emoji_pattern = re.compile(
        u"(\ud83d[\ude00-\ude4f])|"  # emoticons
        u"(\ud83c[\udf00-\uffff])|"  # symbols & pictographs (1 of 2)
        u"(\ud83d[\u0000-\uddff])|"  # symbols & pictographs (2 of 2)
        u"(\ud83d[\ude80-\udeff])|"  # transport & map symbols
        u"(\ud83c[\udde0-\uddff])"  # flags (iOS)
        "+", flags=re.UNICODE)

    for page in range(1,11):
        url = 'http://www.lovewzly.com/api/user/pc/list/search?startage={}&endage={}&gender={}&cityid=180&startheight={}&endheight={}&salary={}&marry=1&page={}'.format(startage,endage,sex,startheight,endheight,salary,page)
        get_one_page(url)

困扰点：python3中将文件写入到txt文件中，使用的方法与python2稍有不同：

python2 ：

        with open(file_path, 'wb') as f:
            f.write('ID:'+item['userid']+'\n')
            f.write('用户名:'+item['username']+'\n')
            f.write('省份:'+item['province']+'\n')
            f.write('城市:'+item['city']+'\n')
            f.write('内心独白:'+item['monolog']+'\n')
            f.write('学历:'+item['education']+'\n')
            f.write('生日年份:'+item['birthdayyear']+'\n')
            f.close()

python3：

        with open(file_path, 'wb') as f:
            # f.write('ID:%d' %int(item['userid'])+'\n')

            # 在python3的环境中进行向txt文件中写入字符串的操作时，需要以bytes的形式将我们的字符串写入到文件中，
            # 采用的方式为（f.write(bytes(string=yourString, '编码格式（如：utf-8)'))
            f.write(bytes('用户名：'+item['username'] +'\n', 'UTF-8'))
            f.write(bytes('省份：' + item['province'] +'\n', 'UTF-8'))
            f.write(bytes('城市：' + item['city'] +'\n', 'UTF-8'))
            f.write(bytes('学历：' + item['education'] +'\n', 'UTF-8'))
            f.write(bytes('内心独白：' + item['monolog'] +'\n', 'UTF-8'))
            f.write(bytes('生日：' + item['birthdayyear'] +'\n', 'UTF-8'))

            f.close()

谢谢大家！！