现在需要采集凡客诚品里面的东西 目前产品 市场价 内容 介绍 等这些都能采集到 但不知道为什么这段时间采集规则变了 有了解采集的朋友速度联系我 有报酬 我对采集不是很懂
下面是之前的采集规则:
<?xml version="1.0" encoding="UTF-8"?>
<Task>
<State>
</State>
<BaseInfo>
<Version>3.21</Version>
<ID>0</ID>
<Name>凡客诚品</Name>
<TaskDemo>
</TaskDemo>
<Class>示例</Class>
<Type>1051</Type>
<RunType>1041</RunType>
<SavePath>data</SavePath>
<ThreadCount>3</ThreadCount>
<UrlCount>1</UrlCount>
<StartPos>
</StartPos>
<EndPos>
</EndPos>
<DemoUrl>http://item.vancl.com/0170605.html</DemoUrl>
<Cookie>
</Cookie>
<WebCode>1000</WebCode>
<IsLogin>False</IsLogin>
<LoginUrl>
</LoginUrl>
<IsUrlEncode>False</IsUrlEncode>
<UrlEncode>
</UrlEncode>
</BaseInfo>
<Result>
<ExportType>1060</ExportType>
<ExportFileName>
</ExportFileName>
<DataSource>
</DataSource>
<DataTableName>
</DataTableName>
<InsertSql>
</InsertSql>
<ExportUrl>
</ExportUrl>
<ExportUrlCode>
</ExportUrlCode>
<ExportCookie>
</ExportCookie>
</Result>
<Advance>
<GatherAgainNumber>3</GatherAgainNumber>
<IsIgnore404>True</IsIgnore404>
<IsErrorLog>False</IsErrorLog>
<IsExportHeader>True</IsExportHeader>
<IsDelRepeatRow>True</IsDelRepeatRow>
<IsDelTempData>False</IsDelTempData>
<IsSaveSingleFile>False</IsSaveSingleFile>
<TempFileName>.xml</TempFileName>
<IsDataProcess>False</IsDataProcess>
<IsExportGUrl>False</IsExportGUrl>
<IsExportGDateTime>False</IsExportGDateTime>
<IsTrigger>False</IsTrigger>
<TriggerType>3020</TriggerType>
<GatherIntervalTime>0</GatherIntervalTime>
<IsCustomHeader>False</IsCustomHeader>
<IsPublishHeader>False</IsPublishHeader>
<IsProxy>False</IsProxy>
<IsProxyFirst>False</IsProxyFirst>
<IsUrlNoneRepeat>False</IsUrlNoneRepeat>
</Advance>
<HttpHeaders>
</HttpHeaders>
<Trigger>
</Trigger>
<WebLinks>
<WebLink>
<Url>http://shirts.vancl.com/search?navtype=1&cateid=1175&view=1</Url>
<IsNag>True</IsNag>
<IsMultiPageGather>False</IsMultiPageGather>
<IsNextPage>False</IsNextPage>
<NextPageRule>
</NextPageRule>
<NextMaxPage>0</NextMaxPage>
<IsDoPostBack>False</IsDoPostBack>
<NextPageUrl>
</NextPageUrl>
<IsGathered>2031</IsGathered>
<NavigationRules>
<NavigationRule>
<Url>http://shirts.vancl.com/search?navtype=1&cateid=1175&view=1</Url>
<Level>1</Level>
<IsNext>False</IsNext>
<NextRule>
</NextRule>
<NextMaxPage>0</NextMaxPage>
<IsNextDoPostBack>False</IsNextDoPostBack>
<NaviStartPos>
</NaviStartPos>
<NaviEndPos>
</NaviEndPos>
<NagRule><Common>(?<=<li\ style="position:relative;"\ class="scListArea"><strong><a\ href=")[^>]+?(?=")</Common></NagRule>
<IsNextPage>False</IsNextPage>
<NextPageRule>
</NextPageRule>
<NaviNextMaxPage>0</NaviNextMaxPage>
<IsNaviNextDoPostBack>False</IsNaviNextDoPostBack>
<IsGather>False</IsGather>
<GatherStartPos>
</GatherStartPos>
<GatherEndPos>
</GatherEndPos>
</NavigationRule>
</NavigationRules>
</WebLink>
</WebLinks>
<GatherRules>
<GatherRule>
<Title>品名</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag><h2></StartFlag>
<EndFlag></h2><div></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
</ExportRules>
</GatherRule>
<GatherRule>
<Title>商品编号</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag>商品编号:</StartFlag>
<EndFlag></span></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
</ExportRules>
</GatherRule>
<GatherRule>
<Title>特惠价</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag>特惠价:<span>¥<strong></StartFlag>
<EndFlag></strong></span></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
</ExportRules>
</GatherRule>
<GatherRule>
<Title>市场价</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag>市场价:¥<strong></StartFlag>
<EndFlag></strong></span></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
</ExportRules>
</GatherRule>
<GatherRule>
<Title>售价</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag>售价:¥</StartFlag>
<EndFlag></span></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
</ExportRules>
</GatherRule>
<GatherRule>
<Title>VIP价</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag>VIP价:¥</StartFlag>
<EndFlag></span></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
</ExportRules>
</GatherRule>
<GatherRule>
<Title>SVIP价</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag>SVIP价:¥</StartFlag>
<EndFlag></span></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
</ExportRules>
</GatherRule>
<GatherRule>
<Title>产品描述</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag>产品描述:</h3></StartFlag>
<EndFlag>span class="blank20"></span></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
<ExportRule>
<ExortField>产品描述</ExortField>
<ExortRuleType>2041</ExortRuleType>
<ExortRuleCondition>
</ExortRuleCondition>
</ExportRule>
</ExportRules>
</GatherRule>
<GatherRule>
<Title>产品属性</Title>
<RuleByPage>5011</RuleByPage>
<DataType>1091</DataType>
<GatherRuleType>5031</GatherRuleType>
<XPath>
</XPath>
<NodePrty>
</NodePrty>
<StartFlag>产品属性:</h3></StartFlag>
<EndFlag><span class="blank20"></span><h3></EndFlag>
<LimitSign>2001</LimitSign>
<RegionExpression>
</RegionExpression>
<IsMergeData>False</IsMergeData>
<NavLevel>0</NavLevel>
<MultiPageName>
</MultiPageName>
<DownloadFileSavePath>
</DownloadFileSavePath>
<DownloadFileDealType>
</DownloadFileDealType>
<IsOcrText>False</IsOcrText>
<OcrScale>0</OcrScale>
<IsAutoDownloadImage>False</IsAutoDownloadImage>
<ExportRules>
<ExportRule>
<ExortField>产品属性</ExortField>
<ExortRuleType>2041</ExortRuleType>
<ExortRuleCondition>
</ExortRuleCondition>
</ExportRule>
</ExportRules>
</GatherRule>
</GatherRules>
</Task>