get deflate stream 的代码

最新推荐文章于 2024-11-12 22:46:48 发布

lukesunch

最新推荐文章于 2024-11-12 22:46:48 发布

阅读量550

点赞数

文章标签： stream exception byte regex 正则表达式 string

本文链接：https://blog.csdn.net/lukesunch/article/details/4885655

版权

最近忙别的，没空关心pdf了。既然有人需要；找了找，大概是这块了

//定义一个识别stream 的正则表达式；自己定义的，未必通用

Regex streamRegex = new Regex(@"<<[^>]*//FlateDecode[^>]*>>/s*stream/s*/n(.*?)/s*endstream", RegexOptions.Singleline);

。。。。。

               match = streamRegex.Match(pdfContent);
                if (match.Length > 0)
                {
                    pdfContent = match.Groups[1].Value;
                    documentContentStart += (match.Groups[1].Index + 2);
                    strLen = match.Groups[1].Length;
                    if (strLen < 2) {
                        strContent = "";
                    }
                    byte[] bufTemp = new byte[strLen - 2];
                    try
                    {
                        for (int i =0; i < strLen - 2; i++)
                        {
                            bufTemp[i] = pdfBuf[documentContentStart + i];
                        }

                        MemoryStream ms = new MemoryStream();
                        ms.Write(bufTemp, 0, bufTemp.Length);
                        ms.Position = 0;

//解压缩
                        DeflateStream deStream = new DeflateStream(ms, CompressionMode.Decompress, true);
                        //GZipStream deStream = new GZipStream(ms, CompressionMode.Decompress);
                        deStream.Flush();

                        int nSize = 16 * 1024;
                        byte[] decompressedBuffer = new byte[nSize]; //16*1024 + 256 Maxium
                        int totalCount = deStream.Read(decompressedBuffer, 0, nSize);
                        //int totalCount = ReadAllBytesFromStream(deStream, decompressedBuffer);
                        deStream.Close();
                        pdfContent = Encoding.Default.GetString(decompressedBuffer, 0, totalCount);
                        File.WriteAllText("c:/tmp/pdftxt.txt", pdfContent);
                        //strContent = strContent + "<P>" + pdfContent + "</P>";
                    }
                    catch (Exception ex)
                    {
                        throw new Exception("error inflate string", ex);
                    }
                }

            }