1. create a scrapy project
>>> scrapy startproject appstore
2. define extracted data schema
edit appstore/appstore/items.py, add the following:
import scrapy
class AppstoreItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
title = scrapy.Field()
url = scrapy.Field()
appid = scrapy.Field()
intro = scrapy.Field()
3. edit huawei_spider.py (example here: extract data from huawei appstore)
import scrapy
import re
from scrapy.selector import Selector
from appstore.items import AppstoreItem
class HuaweiSpider(scrapy.Spider):
name = "huawei"
allowed_domains = ["huawei.com"]
start_urls = ["http://appstore.huawei.com/more/all"]
def parse(self, response):
page =