淘宝网有一个淘宝助理,可以方便的将淘宝店的商品资源导出成csv格式的数据包。很多商城系统为了能快速输入商品,都会要求开发者能最大限度的利用淘宝数据包直接导入产品数据。最近正好有这样一个需求,就研究了一下,记录下来~
大致思路是这样的,将淘宝的数据包上传到自己的商城中,然后解压,读取里面的csv文件,把数据导入到自己的商城中,然后读取淘宝的图片,对图片进行处理,转换成自己商城合适的格式和尺寸
一、直接分析csv,转化为DataTable,依次导入到自己的库中
1.淘宝的csv数据包是用"\t"做为字段间的分隔符,每行数据是用"\n"做为行分隔符
2.要注意的是:宝贝描述(html代码)本身也会包含换行符号,不过不是"\n",而是"\r\n"---幸好是这样,不然的话,宝贝描述本身的换行符与数据每行的分隔符混在一起,就很难区分了.
下面是示例关键代码;
1.上传压缩包
[HttpPost]
public static string UploadPackage(string zipFile, string path, bool Unzip = false)
{
var uploadFile = HttpContext.Current.Request.Files[zipFile];
var session = HttpContext.Current.Session;
var server = HttpContext.Current.Server;
if (uploadFile == null)
{
return "请选择上传文件!!";
}
var extension = Path.GetExtension(uploadFile.FileName) ?? string.Empty;
if (extension.ToLower() != ".zip")
{
return "请上传zip格式的压缩文件!";
}
if (uploadFile.ContentLength >= 40960 * 1024)
{
return "文件大小超过指字限度!";
}
if (HttpContext.Current.Session["zipDir"] != null)
{
if (session != null)
{
if (Directory.Exists(server.MapPath(session["zipDir"].ToString())))
{
Directory.Delete(server.MapPath(session["zipDir"].ToString()), true);
}
}
}
var sysPath = ConfigManager.SystemConfig.图片上传路径;
if (session != null)
{
var fileName = Path.GetFileNameWithoutExtension(uploadFile.FileName);
var dir = sysPath + "/" + path + "/";
var filePath = dir + fileName + Path.GetExtension(uploadFile.FileName);
if (!Directory.Exists(server.MapPath(dir)))
{
Directory.CreateDirectory(server.MapPath(dir));
}
uploadFile.SaveAs(server.MapPath(filePath));
if (Unzip)
{
string err;
bool zipResult = UnZipFile(server.MapPath(filePath), server.MapPath(dir + fileName), out err);
File.Delete(server.MapPath(filePath));
session["zipDir"] = dir + fileName;
if (zipResult)
{
if (File.Exists(server.MapPath(dir + fileName) + "/products.csv"))
{
return dir + fileName + "/products.csv";
}
return "压缩包中缺少products.csv文件!";
}
return "不是有效的zip文件格式,解压失败!";
}
return filePath;
}
return string.Empty;
}
2.解压数据包
public static bool UnZipFile(string zipFilePath, string unZipDir, out string err)
{
err = "";
if (zipFilePath == string.Empty)
{
err = "压缩文件不能为空!";
return false;
}
if (!File.Exists(zipFilePath))
{
err = "压缩文件不存在!";
return false;
}
if (!unZipDir.EndsWith("\\"))
{
unZipDir += "\\";
}
if (!Directory.Exists(unZipDir))
{
Directory.CreateDirectory(unZipDir);
}
try
{
using (var s = new ZipInputStream(System.IO.File.OpenRead(zipFilePath)))
{
ZipEntry theEntry;
while ((theEntry = s.GetNextEntry()) != null)
{
string directoryName = Path.GetDirectoryName(theEntry.Name);
string fileName = Path.GetFileName(theEntry.Name);
if (!string.IsNullOrEmpty(directoryName))
{
Directory.CreateDirectory(unZipDir + directoryName);
}
if (fileName != String.Empty)
{
using (FileStream streamWriter = File.Create(unZipDir + theEntry.Name))
{
var data = new byte[2048];
while (true)
{
int size = s.Read(data, 0, data.Length);
if (size > 0)
{
streamWriter.Write(data, 0, size);
}
else
{
break;
}
}
}
}
}
}
}
catch (Exception ex)
{
err = ex.Message;
return false;
}
return true;
}
3.对解压中的csv文件进行处理
public static DataTable ReadCsvFromTb(string fileName)
{
var dt = new DataTable();
var fs = new FileStream(fileName, FileMode.Open, FileAccess.Read);
var sr = new StreamReader(fs, Encoding.Default);
var strLine = "";
int columnCount = 0, rowNum = 0;
var isTitle = true;
while ((strLine = sr.ReadLine()) != null)
{
if (rowNum > 1)
{
var aryLine = strLine.Split('\t');
if (isTitle)
{
isTitle = false;
columnCount = aryLine.Length;
for (var i = 0; i < columnCount; i++)
{
var dc = new DataColumn(aryLine[i]);
dt.Columns.Add(dc);
}
}
else
{
var dr = dt.NewRow();
for (var j = 0; j < columnCount; j++)
{
dr[j] = aryLine[j].Replace("\"\"", "\"").TrimStart('"').TrimEnd('"');
}
dt.Rows.Add(dr);
}
}
rowNum++;
}
sr.Close();
fs.Close();
return dt;
}
4.导入淘宝数据包数据到自己的库中(下面是我自己的系统中用到的,根据自己的项目,做不同改动)
public string ImportTbData(string zipPath, string area, string type, string sellerType, string brand, bool deleteZip = true)
{
var msg = "";
var unZipDir = zipPath.Substring(0, zipPath.LastIndexOf('.'));
if (FileHelper.UnZipFile(Server.MapPath(zipPath), Server.MapPath(unZipDir), out msg))
{
var filePath = unZipDir + "/products.csv";
if (System.IO.File.Exists(Server.MapPath(filePath)))
{
var taobaoList = new GoodsManager().GetTaoBaoData(CsvHelper.ReadCsvFromTb(Server.MapPath(filePath))).Select(tb => new SellerTaoBaoGoodsViewModel(tb)).ToList();
msg = SaveTbDataToLocalData(taobaoList, area, type, sellerType, brand, unZipDir);
}
else
{
msg = "压缩包中缺少products.csv文件!";
}
}
else
{
msg = "不是有效的zip文件格式,导入数据包失败!";
}
if (deleteZip)
{
System.IO.File.Delete(Server.MapPath(zipPath));
System.IO.Directory.Delete(unZipDir, true);
}
return msg;
}
public string SaveTbDataToLocalData(List<SellerTaoBaoGoodsViewModel> list, string area, string type, string sellerType, string brand, string unZipDir)
{
foreach (var item in list)
{
if (!CopyTbImageToLocalImage(item.图片列表, unZipDir)) continue;
var smallImgList = (List<string>)Session["smallImgListTb"];
var middleImgList = (List<string>)Session["middleImgListTb"];
var bigImgList = (List<string>)Session["bigImgListTb"];
var goods = new Goods
{
ID = Guid.NewGuid(),
店铺ID = Seller.ID,
地区ID = area,
分类ID = Guid.Parse(type),
卖家分类ID = sellerType == string.Empty ? new Guid() : Guid.Parse(sellerType),
名称 = item.名称,
品牌 = brand,
小图 = string.Join(",", smallImgList),
中图 = string.Join(",", middleImgList),
大图 = string.Join(",", bigImgList),
单价 = Math.Round(double.Parse(item.单价), 4),
折扣价 = Math.Round(double.Parse(item.单价), 4),
库存量 = int.Parse(item.库存量),
编码 = item.编码,
详细介绍 = item.详细介绍.Replace("'", ""),
有无发票 = item.有无发票,
退货承诺 = item.退货承诺,
总销量 = 0,
是否上架 = YesNo.是,
是否精选 = item.是否精选,
登记时间 = DateTime.Now
};
new GoodsManager().AddGoods(goods);
Session["smallImgListTb"] = null;
Session["middleImgListTb"] = null;
Session["bigImgListTb"] = null;
}
return string.Format("本次共成功导入{0}条数据!", list.Count);
}
二、数据包中图片处理
1.从csv文件中获取到淘宝数据包中的tbi图片
public class SellerTaoBaoGoodsViewModel
{
public string 名称 { get; set; }
public string 单价 { get; set; }
public string 库存量 { get; set; }
public string 详细介绍 { get; set; }
public string 编码 { get; set; }
public YesNo 有无发票 { get; set; }
public YesNo 是否精选 { get; set; }
public YesNo 退货承诺 { get; set; }
public List<string> 图片列表 { get; set; }
public SellerTaoBaoGoodsViewModel(TaoBaoGoods taoBaoGoods)
{
名称 = taoBaoGoods.宝贝名称;
单价 = taoBaoGoods.宝贝价格;
库存量 = taoBaoGoods.宝贝数量;
详细介绍 = taoBaoGoods.宝贝描述;
编码 = taoBaoGoods.商家编码;
有无发票 = taoBaoGoods.发票;
是否精选 = taoBaoGoods.橱窗推荐;
退货承诺 = taoBaoGoods.退换货承诺;
var list = new List<string>();//这里对图片有一个小处理
for (var i = 0; i < taoBaoGoods.新图片.Split(';').Length - 1; i++)
list.Add(taoBaoGoods.新图片.Split(';')[i].Replace(string.Format(":1:{0}:|", i), ".tbi"));
图片列表 = list;
}
2.处理取到的tbi图片,转换为自己用到的图片格式
public bool CopyTbImageToLocalImage(List<string> list, string unZipDir)
{
var imgDir = unZipDir + "/products/";
foreach (var img in list)
{
var fileStream = new FileStream(Server.MapPath(imgDir + img), FileMode.Open);
var stream = fileStream as Stream;
var imgPath = ImageHelper.CopyToGoodsImg(stream);
var imgPathList = imgPath.Split(new[] { ':' });
if (imgPathList.Length <= 0) continue;
List<string> smallImgList;
List<string> middleImgList;
List<string> bigImgList;
if (Session["smallImgListTb"] == null)
{
smallImgList = new List<string> { imgPathList[0] };
middleImgList = new List<string> { imgPathList[1] };
bigImgList = new List<string> { imgPathList[2] };
Session["smallImgListTb"] = smallImgList;
Session["middleImgListTb"] = middleImgList;
Session["bigImgListTb"] = bigImgList;
}
else
{
smallImgList = (List<string>)Session["smallImgListTb"];
middleImgList = (List<string>)Session["middleImgListTb"];
bigImgList = (List<string>)Session["bigImgListTb"];
smallImgList.Add(imgPathList[0]);
middleImgList.Add(imgPathList[1]);
bigImgList.Add(imgPathList[2]);
Session["smallImgListTb"] = smallImgList;
Session["middleImgListTb"] = middleImgList;
Session["bigImgListTb"] = bigImgList;
}
}
return true;
}
到此,一个导入就算成功了~