初学pyhton,自己找个练手任务。爬取豆瓣电影top250,保存为一个DataFrame数据格式,留待分析.(代码粗糙,留存)
具体要配合豆瓣电影的HTML看代码
url=‘https://movie.douban.com/top250?start=%d&filter=’
from bs4 import BeautifulSoup
from urllib.request import urlopen
import pandas as pd
import numpy as np
from pandas import DataFrame,Series
import re
def split(str,regular): #正则表达式过滤字符串
return re.split(regular,str)
def trans_list(main_list,sub_list):
index=main_list.index(sub_list)
sub_list.reverse() #反转list的排列
for ele in sub_list:
main_list.insert(index,ele) #后一以元素插入在前一元素之前
main_list.pop(main_list.index(sub_list))
return main_list