cmd操作MySQL
- cmd到mysql的bin目录下
- 展示数据库
- 进入数据库
- 展示表
- 创建表
- 删除表
- 删除表格数据
常用对字符串的操作
- strip()
- 默认是去掉字符串两端的空格
- strip(“¥”) 去掉字符串两端的指定字符
- 若想只对一端进行操作,可以使用lstrip(), rstrip()
案例
===========items.py===========
import scrapy
class Mysql01Item(scrapy.Item):
title = scrapy.Field()
author = scrapy.Field()
intro = scrapy.Field()
price = scrapy.Field()
press = scrapy.Field()
press_time = scrapy.Field()
comment_num = scrapy.Field()
=========settings.py==========
LOG_LEVEL = 'WARNING'
USER_AGENT = 'Mozilla/5.XXXXXXX'
ROBOTSTXT_OBEY = False
ITEM_PIPELINES = {
'mysql01.pipelines.Mysql01Pipeline': 300,
}
==========dd.py===========
import scrapy
from mysql01.items import Mysql01Item
class DdSpider(scrapy.Spider):
name = 'dd'
allowed_domains = ['dangdang.com']
start_urls = ['http://search.dangXXXX.com/?key=xxxx']
'''
翻页的另一中方法
pageNum = 199
for page in range(2,pageNum):
next_url = 'http://search.dangdang.com/?key=%D0%C4%C0%ED%D1%A7&act=input&page_index={}'.format(page)
# next_url = f'http://search.dangdang.com/?key=%D0%C4%C0%ED%D1%A7&act=input&page_index=(page)'
yield scrapy.Request(next_url, callback=..., meta={'item' : item})
'''
def parse(self, response):
li_list = response.xpath("//ul[@class='bigimg']/li")
print('*' * 100)
for li in li_list:
item = Mysql01Item()
item['title'] = li.xpath("./a/@title").extract_first()
'''
li.xpath("./p[@class='search_book_author']/span[1]/a[1]/@title") 获取到的是一个 [...]
li.xpath("./p[@class='search_book_author']/span[1]/a[1]/@title").extract_first() 获取到的数 [...] 中的第一个字符串
'''
item['author'] = li.xpath("./p[@class='search_book_author']/span[1]/a[1]/@title").extract_first() if len(li.xpath("./p[@class='search_book_author']/span[1]/a[1]/@title")) > 0 else '无作者信息'
item['intro'] = li.xpath("./p[@class='detail']/text()").extract_first()
item['price'] = li.xpath("./p[@class='price']/span[@class='search_now_price']/text()").extract_first()
item['press'] = li.xpath("./p[@class='search_book_author']/span[3]/a[1]/@title").extract_first()
item['press_time'] = li.xpath("./p[@class='search_book_author']/span[2]/text()").extract_first()
item['comment_num'] = li.xpath(".//a[@class='search_comment_num']/text()").extract_first()
yield item
next_url = response.xpath("//ul[@name='Fy']/li[@class='next']/a/@href").extract_first()
if next_url != None:
next_url = 'http://search.dangdang.com' + next_url
yield scrapy.Request(
next_url,
callback=self.parse
)
===========pipelines.py===========
import pymysql
class Mysql01Pipeline(object):
def process_item(self, item, spider):
db = pymysql.connect(host='localhost', user='root', password='root', db='mybatis_plus', charset='utf8')
cursor = db.cursor()
title = item['title'].strip()
author = item['author']
intro = item['intro']
price = item['price'].strip("¥") + '元'
press = item['press']
pressTime = item['press_time'].strip().strip('/')
commentNum = item['comment_num'].strip('条评论')
cursor.execute(
'insert into dd values(%s,%s,%s,%s,%s,%s,%s)',
(title,author, intro, price, press, pressTime, commentNum)
)
db.commit()
cursor.close()
db.close()
print('=' * 100)
return item