C#读取excel、word、ppt和非扫描pdf的内容

测试模板

OpenFileDialog ofd = new OpenFileDialog();
ofd.InitialDirectory = "";
ofd.Filter = "文件|*.*";
ofd.Multiselect = true;
string[] files;//数据源路径集合

DialogResult r = ofd.ShowDialog();
if (r == DialogResult.OK)
{
    files = ofd.FileNames;
}
else
{
    return;
}

foreach (var file in files)
{
    //此处添加测试代码
}

Console.ReadLine();

读取EXCEL

NuGet中导入NPOI

IWorkbook workbook = null;
FileStream fs = new FileStream(file, FileMode.Open, FileAccess.Read);
string extension = Path.GetExtension(file).ToUpper();
switch (extension)
{
        //xls是03,用HSSFWorkbook打开,.xlsx是07或者10用XSSFWorkbook打开
    case ".XLS": workbook = new HSSFWorkbook(fs); break;
    case ".XLSX": workbook = new XSSFWorkbook(fs); break;
    default: continue;
}

for (int index = 0; index < workbook.NumberOfSheets; index++)
{
    ISheet sheet = workbook.GetSheetAt(index);
    for (int i = sheet.FirstRowNum; i <= sheet.LastRowNum; i++)
    {
        IRow row = sheet.GetRow(i);
        if (row == null) break;
        for (int j = row.FirstCellNum; j < row.LastCellNum; j++)
        {
            ICell cell = row.GetCell(j);//得到cell
            if (cell != null)//如果cell为null,则赋值为空
            {
                string context = row.GetCell(j).ToString();
                Console.Write(context + " ");
            }
        }
        Console.Write("\n");
    }
}

读取PPT

NuGet导入Spire.Presentation

//初始化一个Presentation类实例,并加载文档
Presentation ppt = new Presentation();
try
{
    ppt.LoadFromFile(file);
    foreach (ISlide slide in ppt.Slides)
    {
        foreach (Spire.Presentation.IShape shape in slide.Shapes)
        {
            if (shape is IAutoShape)
            {
                foreach (TextParagraph tp in (shape as IAutoShape).TextFrame.Paragraphs)
                {
                    Console.WriteLine(tp.Text);
                }
            }
        }
    }
}
catch (Exception ex)
{
    Console.WriteLine(ex.Message);
}
finally
{
    //释放资源
    ppt = null;
}

读取Word

导入DotMaysWind.Office.dll,来源GitHub项目Simple Office Reader

var document = OfficeFileFactory.CreateOfficeFile(file) as IWordFile;
string context = document.ParagraphText;
Console.WriteLine(context);

读取PDF(非扫描版)

NuGet导入Spire.Pdf

PdfDocument pdf = new PdfDocument();
try
{
    pdf.LoadFromFile(file);
    foreach (PdfPageBase page in pdf.Pages)
    {
        string context = page.ExtractText();
        Console.WriteLine(context);
    }
}
catch (Exception ex)
{
    Console.WriteLine(ex.Message);
}
finally
{
    //释放资源
    pdf = null;
}

参考资料

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值