检测WORD文档是否加密

Maven Dependencies

        <dependency>
            <groupId>org.apache.tika</groupId>
            <artifactId>tika-core</artifactId>
            <version>1.19.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.tika</groupId>
            <artifactId>tika-parsers</artifactId>
            <version>1.19.1</version>
        </dependency>

        <dependency>
            <artifactId>slf4j-nop</artifactId>
            <version>1.7.2</version>
            <groupId>org.slf4j</groupId>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
        <dependency>
            <groupId>com.github.jai-imageio</groupId>
            <artifactId>jai-imageio-jpeg2000</artifactId>
            <version>1.3.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.xerial/sqlite-jdbc -->
        <dependency>
            <groupId>org.xerial</groupId>
            <artifactId>sqlite-jdbc</artifactId>
            <version>3.25.2</version>
        </dependency>

Code

import org.apache.tika.Tika;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.io.*;
import java.util.Arrays;
import java.util.List;

/**
 * 文档检测工具
 */
public class DocCheckUtil {
    /**
     * office docx pptx xlsx 设置密码保护被检测到的类型
     */
    public static final String MIME_X_TIKA_OOXML_PROTECTED = "application/x-tika-ooxml-protected";
    /**
     * wps docx pptx xlsx 设置密码保护被检测到的类型
     */
    public static final String MIME_X_TIKA_MSOFFICE = "application/x-tika-msoffice";

    /**
     * office/wps doc
     */
    public static final String MIME_MSWORD = "application/msword";
    /**
     * office/wps ppt
     */
    public static final String MIME_MSPOWERPOINT = "application/vnd.ms-powerpoint";
    /**
     * office/wps xls
     */
    public static final String MIME_MSEXCEL = "application/vnd.ms-excel";
    /**
     * office/wps docx
     */
    public static final String MIME_OFFICE_DOCUMENT_WORD = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
    /**
     * office/wps pptx
     */
    public static final String MIME_OFFICE_DOCUMENT_POWERPOINT = "application/vnd.openxmlformats-officedocument.presentationml.presentation";
    /**
     * office/wps xlsx
     */
    public static final String MIME_OFFICE_DOCUMENT_SHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";

    /**
     * doc ppt xls
     */
    public static final List<String> MS_OLD_MIMETYPES = Arrays.asList(MIME_MSWORD, MIME_MSEXCEL, MIME_MSPOWERPOINT);


    /**
     * 检测文档(word,powerpoint,excel)是否设有密码保护
     * @param file 文件
     * @return true 表示设有密码保护;false表示没有密码保护
     * @throws IOException FileNotFoundException 文件不存在;其他
     * @throws SAXException
     */
    public static boolean checkPwdProtected(File file) throws IOException, SAXException {
        try (TikaInputStream stream = TikaInputStream.get(new FileInputStream(file), new TemporaryResources())) {
            String detect = new Tika().detect(stream);
            if (MIME_X_TIKA_OOXML_PROTECTED.equals(detect) || MIME_X_TIKA_MSOFFICE.equals(detect)) {
                return true;
            } else if (MS_OLD_MIMETYPES.contains(detect)) {
                return checkMsmime(stream);
            }
        }
        return false;
    }

    /**
     * 检查 doc ppt xls 是否设有密码保护
     */
    private static boolean checkMsmime(InputStream stream) throws IOException, SAXException {
        Metadata metadata = new Metadata();
        ContentHandler handler = new DefaultHandler();
        ParseContext context = new ParseContext();
        TemporaryResources tmp = new TemporaryResources();
        try{
            new AutoDetectParser().parse(stream, handler, metadata, context);
            System.out.println(handler.toString().length());
            System.out.println(tmp.toString());
            // wps docx 加密
            /*if (metadata.names().length <= 2 && handler.toString().length() == 0) {
                if (MIME_X_TIKA_MSOFFICE..equals(metadata.get(Metadata.CONTENT_TYPE))) {
                    return true;
                }
            }*/
        } catch (TikaException e) {
            // doc 加密保护
            if (e instanceof EncryptedDocumentException) {
                return true;
            }
            // office docx 加密保护
            if (e.getCause() instanceof org.apache.poi.EncryptedDocumentException) {
                return true;
            }
            System.out.println(e.getMessage());
            return true;
        }
        return false;
    }


    public static void main(String[] args) throws Exception {
        System.out.println(checkMsmime(new FileInputStream("C:\\Users\\yf5\\Desktop\\fm\\pmbroken.docx")));
        check1();
    }

    public static void  check1() throws Exception {
        File[] files = new File("C:\\Users\\yf5\\Desktop\\doccheck2").listFiles(new FileFilter() {
            @Override
            public boolean accept(File file) {
                if (file.getName().indexOf("doc") != -1
                        ||file.getName().indexOf("xls") != -1
                        ||file.getName().indexOf("pdf") != -1
                        ||file.getName().indexOf("ppt") != -1)
                    return true;
                return false;
            }
        });

        for (File file : files) {
            System.out.println(file.getName() + ":::::" + checkPwdProtected(file));
        }
    }

}

 

完整pom.xml(包含打包插件)

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>doc.test</groupId>
    <artifactId>doccheck</artifactId>
    <version>1.0.0</version>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
    </properties>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.tika</groupId>
            <artifactId>tika-core</artifactId>
            <version>1.19.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.tika</groupId>
            <artifactId>tika-parsers</artifactId>
            <version>1.19.1</version>
        </dependency>

        <dependency>
            <artifactId>slf4j-nop</artifactId>
            <version>1.7.2</version>
            <groupId>org.slf4j</groupId>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.github.jai-imageio/jai-imageio-jpeg2000 -->
        <dependency>
            <groupId>com.github.jai-imageio</groupId>
            <artifactId>jai-imageio-jpeg2000</artifactId>
            <version>1.3.0</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.xerial/sqlite-jdbc -->
        <dependency>
            <groupId>org.xerial</groupId>
            <artifactId>sqlite-jdbc</artifactId>
            <version>3.25.2</version>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <transformers>
                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <!--<mainClass></mainClass>-->
                                </transformer>
                            </transformers>
                            <filters>  
                                <filter>  
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>

        <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
            <plugins>
                <plugin>
                    <artifactId>maven-clean-plugin</artifactId>
                    <version>3.0.0</version>
                </plugin>
                <!-- see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
                <plugin>
                    <artifactId>maven-resources-plugin</artifactId>
                    <version>3.0.2</version>
                </plugin>
                <plugin>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.7.0</version>
                </plugin>
                <plugin>
                    <artifactId>maven-surefire-plugin</artifactId>
                    <version>2.20.1</version>
                </plugin>
                <plugin>
                    <artifactId>maven-jar-plugin</artifactId>
                    <version>3.0.2</version>
                </plugin>
                <plugin>
                    <artifactId>maven-install-plugin</artifactId>
                    <version>2.5.2</version>
                </plugin>
                <plugin>
                    <artifactId>maven-deploy-plugin</artifactId>
                    <version>2.8.2</version>
                </plugin>
            </plugins>
        </pluginManagement>
    </build>


</project>

 

转载于:https://my.oschina.net/tita/blog/2986556

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值