C# 批量读取word并且把文字保存到txt,可以保存图片

15 篇文章 0 订阅
这个C#代码示例展示了如何使用多线程异步读取Word文档的内容和图片,通过调用UtilsDocument.GetWordImageSync方法同步提取图片,并使用ReadWPSContent方法读取文档文本。代码还提供了选择单个文件或整个目录下文件进行处理的功能。
摘要由CSDN通过智能技术生成

using Spire.Doc;
using Spire.Doc.Documents;
using Spire.Doc.Fields;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Word;

namespace WindowsFormsApp2
{
    /// <summary>
    /// 主窗体类(用于获取word文档中的文字和图片)
    /// </summary>
    public partial class frmMain : Form
    {
        /// <summary>
        /// 读取文档内容异步线程
        /// </summary>
        private BackgroundWorker _readDocWorker = null;

        /// <summary>
        /// 文档路径
        /// </summary>
        private string _docPath = string.Empty;
        private string _dirPath = string.Empty;

        /// <summary>
        /// word文件的名字的绝对路径
        /// </summary>
        List<string> ListOfName = new List<string>();

 


        /// <summary>
        /// 窗体加载事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void FrmMain_Load(object sender, EventArgs e)
        {
            _readDocWorker = new BackgroundWorker();
            _readDocWorker.DoWork += _readDocWorker_DoWork;
            _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;             
        }


        /// <summary>
        /// 测试多线程处理
        /// </summary>
        /// <param name="filename"></param>
        private void ReadDoc(string filename)
        {
            if (File.Exists(filename))
            {
                BackgroundWorker _readDocWorker = new BackgroundWorker();
               
                _readDocWorker.DoWork += _readDocWorker_DoWork;
                _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
               
                _readDocWorker.RunWorkerAsync(filename);
            }

        }
        /// <summary>
        /// 选择文档按钮点击事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnChooseFile_Click(object sender, EventArgs e)
        {
            var openfile = new OpenFileDialog();

            openfile.Filter = "文档(*.doc;*.docx)|*.doc;*.docx";
            openfile.Title = "请选择文档";

            if (openfile.ShowDialog() == DialogResult.OK)
            {
                _docPath = openfile.FileName;
                this.richTxtBox.Text = "正在加载。。。";
                this.btnChooseFile.Enabled = false;

                _readDocWorker.RunWorkerAsync();
            }
            else
            {
                this.richTxtBox.Text = "请选择文档";
            }
        }

        /// <summary>
        /// 读取文档内容事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void _readDocWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            var deskPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);
            
            var imgName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath);

            //从文档中同步提取图片

            UtilsDocument.GetWordImageSync(_docPath, imgName);

            //读取文档中的文本内容

            var content = ReadWPSContent(_docPath);

            if (!string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(_docPath))
            {
                StringBuilder sb = new StringBuilder(content);
                var txtName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath) + "(解析).txt";

                FileStream fs = new FileStream(txtName, FileMode.OpenOrCreate, FileAccess.ReadWrite);
                StreamWriter sw = new StreamWriter(fs);

                sw.Write(content);

                sw.Close();
                fs.Close();
            }

            e.Result = content;

            Thread.Sleep(10000);
        }

        /// <summary>
        /// 读取文档内容完成事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void _readDocWorker_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {
            var content = (string)e.Result;

            if (!string.IsNullOrEmpty(content))
            {
                this.richTxtBox.Text = content;

                #region 显示进度
                try
                {
                    int i = Int16.Parse(progressBar.Tag.ToString());
                   
                    if (i != 0)
                    {
                        progressBar.Value += 100 / i;

                        num_lbl.Text = progressBar.Value.ToString();
                    }
                    else
                    {
                        num_lbl.Text = "0/0";
                    }
                }
                catch (Exception ee)
                {

                }
                #endregion

            }
            else
            {
                this.richTxtBox.Text = "读取失败";
            }

            this.btnChooseFile.Enabled = true;
        }

        /// <summary>
        /// 构造函数
        /// </summary>
        public frmMain()
        {
            InitializeComponent();

            this.Load += FrmMain_Load;
        }

        /// <summary>
        /// 读取WPS文档的内容(这里用的是WPS的API)
        /// </summary>
        /// <param name="docPath"></param>
        private string ReadWPSContent(string docPath)
        {
            //定义Word实例和文档实例

            var word = new Word.Application();
            var doc = new Word.Document();
            var txtContent = string.Empty;

            try
            {
                //设置打开文档的参数,这里是只读打开

                object name = docPath;
                object Range = System.Reflection.Missing.Value;
                object unknow = Type.Missing;
                object isReadOnly = true;

                //打开给定目录的文档

                word.Visible = false;

                doc = word.Documents.Open(ref name, ref unknow, ref isReadOnly, ref unknow, ref unknow,
                    ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow, ref unknow,
                    ref unknow, ref unknow, ref unknow, ref unknow);

                //全选文档中的数据并复制到剪切板

                doc.ActiveWindow.Selection.WholeStory();
                doc.ActiveWindow.Selection.Copy();

                //获取当前剪贴板上的数据

                IDataObject data = null;

                if (this.InvokeRequired)
                {
                    this.Invoke((Action)delegate
                    {
                        data = Clipboard.GetDataObject();
                    });
                }
                else
                {
                    data = Clipboard.GetDataObject();
                }

                if (data != null)
                {
                    //获取文本类型数据

                    if (data.GetDataPresent(DataFormats.Text))
                    {
                        txtContent = (string)data.GetData(DataFormats.Text);
                    }
                    else
                    {
                        txtContent = string.Empty;
                    }
                }
                else
                {
                    txtContent = string.Empty;
                }
            }
            catch (Exception exc)
            {
                txtContent = string.Empty;
            }
            finally
            {
                if (doc != null)
                {
                    doc.Close();
                    doc = null;
                }

                if (word != null)
                {
                    word.Quit();
                    word = null;
                }
            }

            return txtContent;
        }

        /// <summary>
        /// 获取目录下的文件名称按钮
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void btnDirectChoose_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog fbd = new FolderBrowserDialog();
            
            fbd.SelectedPath = "D:\\003、历史项目\\219、业绩考评系统\\项目文档\\2019、2020年司法档案工作\\2019-完整数据";

            fbd.SelectedPath = "C:\\Users\\HUAWEI\\Desktop\\test";
            DialogResult result = fbd.ShowDialog();
            progressBar.Tag = 0;

            if (result == DialogResult.OK && !string.IsNullOrWhiteSpace(fbd.SelectedPath))
            {
                _dirPath = fbd.SelectedPath;
                string[] files = GetFilename(_dirPath);
                if (null != files)
                {
                    for(int i = 0; i < files.Length; i++)
                    {
                        FileListBox.Items.Add(files[i]);
                        //ReadDoc(files[i]);
                        //break;
                    }
                }
                progressBar.Tag = files.Length;
                
            }

            conversion();

            //MessageBox.Show(_dirPath);
        }
        
        /// <summary>
        /// 获取目录下的所有的文件列表
        /// </summary>
        /// <param name="_dirPath"></param>
        /// <returns></returns>
        private string [] GetFilename (string _dirPath)
        {
            string[] files =null;
            
            DirectoryInfo dire = new DirectoryInfo(_dirPath);
            FileInfo[] fileinfo = dire.GetFiles();
            
            files = new string[fileinfo.Length];

            for (int i = 0; i < fileinfo.Length; i++)
            {
                files[i]=fileinfo[i].FullName;
                ListOfName.Add(fileinfo[i].FullName);
            }
            return files;
        }

        /// <summary>
        /// 批量转换
        /// </summary>
        private void conversion()
        {
            for (int i = 0; i < ListOfName.Count; i++)
            {
                _docPath = ListOfName[i];

                /*
                if (i == 0)
                {
                    _readDocWorker = new BackgroundWorker();
                    _readDocWorker.DoWork += _readDocWorker_DoWork;
                    _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
                }
                else
                {
                    _readDocWorker = null;
                    _readDocWorker = new BackgroundWorker();
                    _readDocWorker.DoWork += _readDocWorker_DoWork;
                    _readDocWorker.RunWorkerCompleted += _readDocWorker_RunWorkerCompleted;
                }
                


                _readDocWorker.RunWorkerAsync();
                */

                var deskPath = Environment.GetFolderPath(Environment.SpecialFolder.Desktop);

                var imgName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath);

                //从文档中同步提取图片

                UtilsDocument.GetWordImageSync(_docPath, imgName);

                //读取文档中的文本内容

                var content = ReadWPSContent(_docPath);

                if (!string.IsNullOrEmpty(content) && !string.IsNullOrEmpty(_docPath))
                {
                    StringBuilder sb = new StringBuilder(content);
                    var txtName = deskPath + @"\" + Path.GetFileNameWithoutExtension(_docPath) + "(解析).txt";

                    FileStream fs = new FileStream(txtName, FileMode.OpenOrCreate, FileAccess.ReadWrite);
                    StreamWriter sw = new StreamWriter(fs);

                    sw.Write(content);

                    sw.Close();
                    fs.Close();
                }


            }
                
        }

    }
}

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
C# ,可以将图片保存到 SQL 数据库,也可以从 SQL 数据库读取图片。下面是一个简单的示例: 保存图片到 SQL 数据库: ```csharp // 假设您已经将图片存储在了 byte[] buffer ,并且有一个名为 "Images" 的表来存储图片 // 创建一个 SqlConnection 对象 using (var connection = new SqlConnection("your_connection_string")) { // 打开连接 connection.Open(); // 创建一个 SqlCommand 对象 using (var command = new SqlCommand("INSERT INTO Images (Image) VALUES (@Image)", connection)) { // 将图片数据添加到参数集合 command.Parameters.Add("@Image", SqlDbType.Image).Value = buffer; // 执行命令 command.ExecuteNonQuery(); } } ``` 从 SQL 数据库读取图片: ```csharp // 假设您要从 "Images" 表获取图片数据并显示在 PictureBox 控件 // 创建一个 SqlConnection 对象 using (var connection = new SqlConnection("your_connection_string")) { // 打开连接 connection.Open(); // 创建一个 SqlCommand 对象 using (var command = new SqlCommand("SELECT Image FROM Images WHERE Id = @Id", connection)) { // 添加参数 command.Parameters.Add("@Id", SqlDbType.Int).Value = 1; // 假设您要获取 Id 为 1 的图片 // 执行查询并获取 SqlDataReader 对象 using (var reader = command.ExecuteReader()) { // 读取数据 if (reader.Read()) { // 获取图片数据 var buffer = (byte[])reader["Image"]; // 创建一个 MemoryStream 对象 using (var stream = new MemoryStream(buffer)) { // 使用 Image.FromStream 方法将 buffer 转换为一个 Image 对象 var image = Image.FromStream(stream); // 将 Image 对象显示在 PictureBox 控件 pictureBox1.Image = image; } } } } } ``` 请注意,这只是一个简单的示例,实际实现可能会因为数据格式、表结构等因素而有所不同。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值