C#导入Word转换成HTML再解析出自己想要的数据。

#region 作业导入
        /// <summary>
        /// 前端传入参数
        /// </summary>
        public class Task_ReqeustParseImportFileModel
        {
            public Task_ReqeustParseImportFileModel()
            {
                TemplateTitleName = string.Empty;
            }
            /// <summary>
            /// 模板标题名称
            /// </summary>
            public string TemplateTitleName { get; set; }
            /// <summary>
            /// 文件名
            /// </summary>
            public string fileName { get; set; }
            /// <summary>
            /// 文件的md5值
            /// </summary>
            public string fileMd5 { get; set; }
            /// <summary>
            /// 暂时没用
            /// </summary>
            public string param { get; set; }
        }
        /// <summary>
        /// 文本处理方法返回参数实体
        /// </summary>
        public class Task_Response
        {
            public Task_Response()
            {
                Content = string.Empty;
                Type = 0;
                IsObjective = false;
                Score = 0;
                Answer = new List<xiding.Models.xiding_task_answerModel>();
            }
            /// <summary>
            /// 内容
            /// </summary>
            public string Content { get; set; }
            /// <summary>
            /// 题目类型
            /// </summary>
            public int Type { get; set; }
            /// <summary>
            /// 是否客观
            /// </summary>
            public bool IsObjective { get; set; }
            /// <summary>
            /// 分数
            /// </summary>
            public double Score { get; set; }
            /// <summary>
            /// 答案列表
            /// </summary>
            public List<xiding.Models.xiding_task_answerModel> Answer { get; set; }
        }

        //上传文件并转换为html wordToHtml(wordFilePath)
        ///<summary>
        ///上传文件并转存为html
        ///</summary>
        ///<param name="wordFilePath">word文档在客户机的位置</param>
        ///<returns>上传的html文件的地址</returns>
        ///word的小标题前面一定要加:教案小标题
        [HttpPost]
        [Route("ParseImportTaskWordFile")]
        public BaseReturnModel<List<xiding.Models.xiding_task_topicModel>> ParseImportTaskWordFile([FromBody]Task_ReqeustParseImportFileModel obj)
        {
            //验证标题名
            if (string.IsNullOrEmpty(obj.TemplateTitleName))
            {
                return new BaseReturnModel<List<xiding.Models.xiding_task_topicModel>>()
                {
                    Succeed = false,
                    Message = "模板标题名称必填!",
                };
            }
            //模板标题名称的长度
            var TemplateTitleNameLength = obj.TemplateTitleName.Length;
            //文件MD5+文件后缀名
            var f = obj.fileMd5 + FileM.GetFileExt(obj.fileName);
            //文件路径
            var filePath = PermissionManage.BLLFactory.ServiceFactory.Util.GetUploadFolderPath(f);
            if (!System.IO.File.Exists(filePath))
            {
                return new Unit.BaseReturnModel<List<xiding.Models.xiding_task_topicModel>>()
                {
                    Succeed = false,
                    Message = string.Format("未能找到文件{0}", obj.fileName),
                };
            }
            //教案实体
            var task_topicList = new List<xiding.Models.xiding_task_topicModel>();
            try
            {
                Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
                Type wordType = word.GetType();
                Microsoft.Office.Interop.Word.Documents docs = word.Documents;

                // 打开文件
                Type docsType = docs.GetType();

                Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
                System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { filePath, true, true });

                // 转换格式,另存为html
                Type docType = doc.GetType();

                //文件名
                string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
                System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
                // 判断指定目录下是否存在文件夹,如果不存在,则创建
                var htmlUploadPath = PermissionManage.BLLFactory.ServiceFactory.Util.GetUploadFolderPath() + "\\html";
                if (!Directory.Exists(htmlUploadPath))
                {
                    // 创建up文件夹
                    Directory.CreateDirectory(htmlUploadPath);
                }


                //被转换的html文档保存的位置
                string ConfigPath = Path.Combine(htmlUploadPath, $"{filename}.html");
                object saveFileName = ConfigPath;
                //作为html保存
                docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
                null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });

                //关闭文档
                docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
                null, doc, new object[] { null, null, null });

                // 退出 Word
                wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);

                //转到新生成的页面
                FileStream aFile = new FileStream(ConfigPath, FileMode.Open);
                StreamReader sr = new StreamReader(aFile, System.Text.Encoding.Default);//流阅读器 读取流中的内容 
                var html = sr.ReadToEnd();
                var start = html.IndexOf("<p");
                var stop = html.IndexOf("</div>");
                //要的html中的内容
                var Conten = html.Substring(start, stop - start).ToString();

                #region 如果填写的模板标题名称不合格的话就将生成的html文件删除以及上传的word也给删掉
                if (!Conten.Contains(obj.TemplateTitleName))
                {
                    //删除word
                    File.Delete(filePath);
                    sr.Close();
                    //删除html
                    File.Delete(ConfigPath);
                    return new BaseReturnModel<List<xiding.Models.xiding_task_topicModel>>()
                    {
                        Succeed = false,
                        Message = "请填写正确的模板标题名称!",
                    };
                }
                #endregion

                Conten = Conten.Replace("\"", "\'");
                //地址
                var ipUrl = HttpContext.Current.Request.Url.Host;
                //端口
                var Port = HttpContext.Current.Request.Url.Port;
                //内容替换 加入地址前缀
                Conten = Conten.Replace("src='", $"src='http://{ipUrl}:{Port}/upload/html/");
                //<p>段落分隔符 数组
                string[] condition = { "\r\n<p" };
                //<p>数组
                var pStrs = Conten.Split(condition, StringSplitOptions.None);

                for (var i = 0; i < pStrs.Count(); i++)
                {
                    if (i == 0)
                    {
                        continue;
                    }
                    pStrs[i] = $"\r\n<p{pStrs[i]}";
                }
                //小标题
                ArrayList arr = new ArrayList(pStrs);
                var TitleStrs = pStrs.Where(e => e.Contains(obj.TemplateTitleName)).Select(e => arr.IndexOf(e)).OrderBy(e => arr.IndexOf(e)).ToList();

                var Liststr = new List<string>();
                for (var i = 0; i < TitleStrs.Count(); i++)
                {
                    var j = i;
                    j++;
                    //下一位
                    var NextIndex = j < TitleStrs.Count() ? TitleStrs[j] : pStrs.Count();
                    var Contentstr = Task_Handle(pStrs.ToList(), TitleStrs[i], NextIndex, obj.TemplateTitleName, TemplateTitleNameLength);
                    var task_topics = new xiding.Models.xiding_task_topicModel()
                    {
                        Content = Contentstr.Content,
                        Type = Contentstr.Type,
                        IsObjective = Contentstr.IsObjective,
                        Score = Contentstr.Score,
                        task_answers = Contentstr.Answer,
                    };
                    task_topicList.Add(task_topics);
                }
                sr.Close();
                //删除word
                File.Delete(filePath);
                //删除html
                File.Delete(ConfigPath);
            }
            catch (Exception ex)
            {
                return new Unit.BaseReturnModel<List<xiding.Models.xiding_task_topicModel>>()
                {
                    Succeed = false,
                    Message = ex.Message,
                };
            }
            return new BaseReturnModel<List<xiding.Models.xiding_task_topicModel>>()
            {
                Succeed = true,
                Message = string.Empty,
                Data = task_topicList,
            };

        }

        #region 封装给导入作业接口用的处理函数
        public Task_Response Task_Handle(List<string> pStrs, int index, int NextIndex, string TemplateTitleName, int TemplateTitleNameLength)
        {
            var model = new Task_Response();
            var answersList = new List<xiding.Models.xiding_task_answerModel>();
            for (var i = index; i < NextIndex; i++)
            {
                if (i == index)
                {
                    #region 提取题目类型
                    //题目类型起始索引
                    var typeStart = pStrs[i].IndexOf(TemplateTitleName) + TemplateTitleNameLength;
                    var type = pStrs[i].Substring(typeStart, 3);
                    switch (type)
                    {
                        case "单选题":
                            model.Type = 2;
                            break;
                        case "多选题":
                            model.Type = 3;
                            break;
                        case "填空题":
                            model.Type = 1;
                            break;
                        case "问答题":
                            model.Type = 4;
                            break;
                    }
                    #endregion
                    #region 提取题干
                    //截取题干起始索引
                    var ContentStart = pStrs[i].IndexOf(TemplateTitleName) + TemplateTitleNameLength + 4;//+单选题多选题填空题问答题 带上冒号 4个字
                    //截取题干结束索引
                    var ContentEnd = pStrs[i].LastIndexOf("(");
                    //截取到的题干内容
                    model.Content = pStrs[i].Substring(ContentStart, ContentEnd - ContentStart).ToString();
                    #endregion
                    #region 提取分数
                    var ScoreStart = pStrs[i].LastIndexOf("lang=EN-US>") + "lang=EN-US>".Length;
                    var ScoreEnd = pStrs[i].LastIndexOf("</span>分)");
                    model.Score = Convert.ToDouble(pStrs[i].Substring(ScoreStart, ScoreEnd - ScoreStart));
                    #endregion
                }
                if (i != index)
                {
                    //这里可以处理选项答案
                    var Start = string.Empty;
                    if (model.Type == 2 || model.Type == 3)
                    {
                        Start = "选项:";
                    }
                    else
                    {
                        Start = "填空:";
                    }
                    var answerStart = pStrs[i].IndexOf(Start) + Start.Length;
                    var answerEnd = pStrs[i].IndexOf("</p>");
                    var answerstr = pStrs[i].Substring(answerStart, answerEnd - answerStart);
                    var answerModel = new xiding.Models.xiding_task_answerModel();
                    if (answerstr.Contains("(正确答案)"))
                    {
                        model.IsObjective = true;//如果有正确答案就是客观题 
                        answerModel.IsRightKey = true;
                        answerModel.Title = answerstr.Replace("(正确答案)", string.Empty);
                        answersList.Add(answerModel);
                        continue;
                    }
                    answerModel.Title = pStrs[i].Substring(answerStart, answerEnd - answerStart);
                    answersList.Add(answerModel);
                }
            }
            model.Answer = answersList;
            return model;
        }
        #endregion
        #endregion

 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值