前言
工作中有个场景是chatgpt聊天对话记录下载支持PDF、Word、MD格式,于是有了本篇文章。
实现思路
对话中可能会用md格式的回答,且因为不能直接转成pdf或者word,所以这里通过方法转成md或者html代码,使用html格式转成pdf或者word。
代码实现
需要的nuget包
- html转pdf
iTextSharp
itextsharp.xmlworker - html转word
FreeSpire.Doc - md转html
Markdig
创建HtmlHelper帮助类
/// <summary>
/// HTML帮助类
/// </summary>
public class HtmlHelper
{
/// <summary>
/// 字体路径
/// </summary>
private static string FontPath;
public HtmlHelper(string fontPath)
{
FontPath = fontPath;
}
public HtmlHelper()
{
}
//将html字符串转为pdf字节数组
public byte[] ConvertHtmlTextToPDF(string htmlText)
{
if (string.IsNullOrEmpty(htmlText))
{
return null;
}
try
{
MemoryStream outputStream = new MemoryStream(); //要把PDF寫到哪個串流
byte[] data = Encoding.UTF8.GetBytes(htmlText); //字串轉成byte[]
MemoryStream msInput = new MemoryStream(data);
Document doc = new Document(); //要寫PDF的文件,建構子沒填的話預設直式A4
PdfWriter writer = PdfWriter.GetInstance(doc, outputStream);
//指定文件預設開檔時的縮放為100%
PdfDestination pdfDest = new PdfDestination(PdfDestination.XYZ, 0, doc.PageSize.Height, 1f);
//開啟Document文件
doc.Open();
//使用XMLWorkerHelper把Html parse到PDF檔裡
XMLWorkerHelper.GetInstance().ParseXHtml(writer, doc, msInput, null, Encoding.UTF8, new UnicodeFontFactory());
//將pdfDest設定的資料寫到PDF檔
PdfAction action = PdfAction.GotoLocalPage(1, pdfDest, writer);
writer.SetOpenAction(action);
doc.Close();
msInput.Close();
outputStream.Close();
return outputStream.ToArray();
}
catch (Exception ex)
{
throw new UserFriendlyException("转PDF时异常,请联系管理员!", ex);
}
}
//将html字符串转为word字节数组
public byte[] ConvertHtmlTextToWord(string htmlText)
{
if (string.IsNullOrEmpty(htmlText))
{
return null;
}
try
{
MemoryStream outputStream = new MemoryStream(); //要把Word寫到哪個串流
byte[] data = Encoding.UTF8.GetBytes(htmlText); //字串轉成byte[]
MemoryStream stream = new MemoryStream(data);
Spire.Doc.Document doc = new Spire.Doc.Document(); //要寫Word的文件,建構子沒填的話預設直式A4
doc.LoadFromStream(stream, FileFormat.Html, XHTMLValidationType.None);
doc.SaveToStream(outputStream, FileFormat.Docx);
doc.Close();
return outputStream.ToArray();
}
catch (Exception ex)
{
throw new UserFriendlyException("转Word时异常,请联系管理员!", ex);
}
}
//字体工厂
public class UnicodeFontFactory : FontFactoryImp
{
public override Font GetFont(string fontname, string encoding, bool embedded, float size, int style, BaseColor color,
bool cached)
{
BaseFont baseFont = BaseFont.CreateFont(FontPath, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
return new Font(baseFont, size, style, color);
}
}
}
创建测试信息类
public class Message
{
public string Type { get; set; }
public string Context { get; set; }
}
通过控制器调用。
public class FileController:Controller
{
private readonly IHttpContextAccessor _httpContextAccessor;
public FileController(IHttpContextAccessor httpContextAccessor)
{
_httpContextAccessor=httpContextAccessor;
}
/// <summary>
/// 导出当前对话全部信息为Markdown
/// </summary>
/// <param name="messageId"></param>
/// <returns></returns>
[HttpGet]
[DisableAuditing]
public async Task<IActionResult> GenerateAllMarkdown()
{
var stringBuilder = await GenerateFile("md");
byte[] markdownBytes = Encoding.UTF8.GetBytes(stringBuilder);
var fileName = $"文件名称-{Clock.Now.ToString("yyyyMMddHHmmss")}.md";
var contentType = "text/markdown";
return File(markdownBytes, contentType, fileName);
}
/// <summary>
/// 导出当前对话全部信息为Word
/// </summary>
/// <param name="messageId"></param>
/// <returns></returns>
[HttpGet]
[DisableAuditing]
public async Task<ActionResult> GenerateAllWord()
{
var stringBuilder = await GenerateFile("html");
HtmlHelper pdfHelper = new HtmlHelper();
var byteData = pdfHelper.ConvertHtmlTextToWord(stringBuilder);
var fileName = $"文件名称-{Clock.Now.ToString("yyyyMMddHHmmss")}.docx";
var contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
return File(byteData, contentType, fileName);
}
/// <summary>
/// 导出当前对话全部信息为PDF
/// </summary>
/// <param name="chatId"></param>
/// <param name="chatUserId"></param>
/// <returns></returns>
/// <exception cref="UserFriendlyException"></exception>
[HttpGet]
[DisableAuditing]
public async Task<ActionResult> GeneratePDF()
{
var stringBuilder = await GenerateFile("html");
string fontPath = Path.Combine("字体地址", "Nsimsun.ttf");
HtmlHelper pdfHelper = new HtmlHelper(fontPath);
var byteData = pdfHelper.ConvertHtmlTextToPDF(stringBuilder);//生成PDF
var fileName = $"文件名称-{Clock.Now.ToString("yyyyMMddHHmmss")}.pdf";
var contentType = "application/pdf";
return File(byteData, contentType, fileName);
}
private async Task<string> GenerateFile(string type)
{
var title = "我是标题";
var UserName = "我是用户";
var Url = "我是链接";
List<Message> currentMessage = new List<Message>()
{
new Message()
{
Type ="user",
Context="你是谁"
},
new Message()
{
Type ="bot",
Context="hello,我是chatgpt"
}
//...
};
//信息查询操作。。。。
string stringBuilder = default;
string symbolA = "";
string symbolB = "";
switch (type)
{
case "html":
symbolA = "<strong>";
symbolB = "</strong>";
break;
case "md":
symbolA = "**";
symbolB = "**";
break;
}
//设置标题
stringBuilder += $"# {title} \n";
foreach (var item in allMessages)
{
if (item.Type == "bot")
{
stringBuilder += $"{symbolA} AI回答:{symbolB} \n";
}
else
{
stringBuilder += $"{symbolA} 用户提问:{symbolB} \n";
}
stringBuilder += $"{item.Content} \n";
stringBuilder += "\n";
}
var request = _httpContextAccessor.HttpContext.Request;
string fullUrl = $"{request.Scheme}://{request.Host}";
stringBuilder += $"> 编辑于 {DateTime.Now}\n " +
$">作者:{UserName} \n " +
$">链接:{Url}\n " +
$">来源:我是来源";
if (type == "html")
{
stringBuilder = Markdown.ToHtml(stringBuilder);
stringBuilder = stringBuilder.Replace("/n", "<br />");
stringBuilder = stringBuilder.Replace("<p>", "");
stringBuilder = stringBuilder.Replace("</p>", "<br />");
stringBuilder = stringBuilder.Replace("AI回答:", "<br /> AI回答:");
stringBuilder = stringBuilder.Replace("用户提问:", "<br /> 用户提问:");
stringBuilder = stringBuilder.Replace("<blockquote>", "");
stringBuilder = stringBuilder.Replace("</blockquote>", "");
stringBuilder = stringBuilder.Replace("编辑于:", "<br /><br /> 编辑于:");
stringBuilder = stringBuilder.Replace("作者:", "<br /> 作者:");
stringBuilder = stringBuilder.Replace("链接:", "<br /> 链接:");
stringBuilder = stringBuilder.Replace("来源:", "<br /> 来源:");
stringBuilder = $"\r\n<!DOCTYPE html>\r\n<html>\r\n<head>\r\n <meta charset=\"utf-8\" />\r\n <title>{currentChat.Name}</title>\r\n</head>\r\n<body>\r\n" + stringBuilder;
stringBuilder += "\r\n</body>\r\n</html>";
}
return stringBuilder;
}
}