c#读取 word 内容

该代码段展示了一个使用C#实现的功能,通过Microsoft.Office.Interop.Word库遍历指定文件夹下的所有.doc文件,读取文档内容并提取特定字符串(如'地籍号:'和'权利人'之间的文本)。程序还会将处理后的文档复制到备份目录,并替换特定字段。此代码适用于批量处理和信息提取场景。
摘要由CSDN通过智能技术生成
using Microsoft.Office.Interop.Word;
        /// 截取字符串中开始和结束字符串中间的字符串
        /// </summary>
        /// <param name="source">源字符串</param>
        /// <param name="startStr">开始字符串</param>
        /// <param name="endStr">结束字符串</param>
        /// <returns>中间字符串</returns>
        public string SubstringSingle(string source, string startStr, string endStr)
        {
            Regex rg = new Regex("(?<=(" + startStr + "))[.\\s\\S]*?(?=(" + endStr + "))", RegexOptions.Multiline | RegexOptions.Singleline);
            return rg.Match(source).Value;
        }
 private void button2_Click(object sender, EventArgs e)
        {


            FolderBrowserDialog dialog = new FolderBrowserDialog();
            dialog.Description = "请选择文件路径";
            string foldPath;
            if (dialog.ShowDialog() == DialogResult.OK)
            {
                foldPath = dialog.SelectedPath;

                //MessageBox.Show("已选择文件夹:" + foldPath, "选择文件夹提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                List<String> listDoc = new List<string>();
                List<String> listExcel = new List<string>();
                //遍历文件夹
                DirectoryInfo theFolder = new DirectoryInfo(foldPath);
                FileInfo[] thefileInfo = theFolder.GetFiles("*.doc", SearchOption.TopDirectoryOnly);

                foreach (FileInfo NextFile in thefileInfo) //遍历文件
                {
                    listDoc.Add(NextFile.FullName);
                }

                foreach (string item in listDoc)
                {
                    if (!Directory.Exists(foldPath + "\\bak"))
                    {
                        Directory.CreateDirectory(foldPath + "\\bak");
                    }
                    string text = "";  //全文文本
                    try
                    {
                        Microsoft.Office.Interop.Word.Application app = new Microsoft.Office.Interop.Word.Application();//打开word程序
                        Microsoft.Office.Interop.Word.Document doc = null;//实例化一个新的word文档
                        object unknow = Type.Missing;
                        app.Visible = false;
                        //object paramSourceDocPath = "G:\\work\\20201021\\5\\1_2.doc";
                        object paramSourceDocPath = item;
                        doc = app.Documents.Open(ref paramSourceDocPath,
                                                 ref unknow, ref unknow, ref unknow, ref unknow, ref unknow,
                                                 ref unknow, ref unknow, ref unknow, ref unknow, ref unknow,
                                                 ref unknow, ref unknow, ref unknow, ref unknow, ref unknow);

                        if (doc != null)
                        {
                            text = doc.Content.Text.Trim();//将全篇内容存入字符串中
                            doc.Close(ref unknow, ref unknow, ref unknow);
                            string temp = SubstringSingle(text, "地籍号:", "权利人");
                            string temp3 = temp.Replace("\r\r", "");


                            string temp1 = SubstringSingle(text, "身份证号", "身份证号");
                            string temp2 = temp1.Replace("\r\a", "");
                            string path1 =  item;
                            string path2 = foldPath + "\\bak\\" + temp3 + "_" + temp2 +".doc";
                            File.Copy(path1, path2);

                        }

                    }
                    catch (Exception)
                    {

                    }


                }
                MessageBox.Show("处理完成!");

            }
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值