【工作日志】日本知识产权网站:外观采集的详情页链接拼接算法

"""
    https://www.j-platpat.inpit.go.jp/cache/gazette_work/DESIGN_BUL/2023/226/DG/0001758001/0001758601/0001758641/0001758642/C1BFCFCC0220039156C081343F344EB1375C6301CE9AEC61D9B265CF156AE942/0001758642000001.jpg
    https://www.j-platpat.inpit.go.jp/cache/original/DESIGN_BUL/2023/226/DG/0001758001/0001758601/0001758641/0001758642/C1BFCFCC0220039156C081343F344EB1375C6301CE9AEC61D9B265CF156AE942/0001758642.xml
    https://www.j-platpat.inpit.go.jp/cache/gazette_work/DESIGN_BUL
    /2024/001/DG/0001760001/0001760501/0001760591/0001760600
    /2A4C11E772AE7D40ED45EE3EA34D0397CC825349764F4549889AE3CBBEBBD427/0001760600.pdf

    VOLUME_NUM:2023226  切片 4+3
    BUL_TYPE:DG
    DOCU_NUM:1758642   前三补零 0001758001  后三变001
    DOCU_NUM:1760600   前三补零 0001760501  后二变01 若结尾是00,倒数第三位减一,倒数第二位是9
    DOCU_NUM:1760600   前三补零 0001760591  后一变1  若结尾是0,倒数第二位减一
    DOCU_NUM:1758642   前三补零 0001758642  其他不变
    HASH_VALUE:C1BFCFCC0220039156C081343F344EB1375C6301CE9AEC61D9B265CF156AE942
    DOCU_NUM:1758642   前三补零 0001758642  其他不变 .xml
    DOCU_NUM:1758642   前三补零 0001758642  其他不变.jpg
    DOCU_NUM:1758642   前三补零 0001758642  其他不变.pdf
    """
    if len(DOCU_NUM) < 10:
        b_DOCU_NUM_001 = '000%s001' % DOCU_NUM[0:-3]
        b_DOCU_NUM_01 = '000%s01' % DOCU_NUM[0:-2]
        b_DOCU_NUM_1 = '000%s1' % DOCU_NUM[0:-1]
        b_DOCU_NUM = '000%s' % DOCU_NUM

        if DOCU_NUM[-1] == '0':
            # 重写b_DOCU_NUM_1
            b_DOCU_NUM_1 = '000%s%s1' % (DOCU_NUM[0:-2], int(DOCU_NUM[-2]) - 1)
            if DOCU_NUM[-2] == '0':
                # 重写b_DOCU_NUM_1
                b_DOCU_NUM_1 = '000%s%s1' % (DOCU_NUM[0:-3], int(DOCU_NUM[-3:-1]) - 1)
                # 重写b_DOCU_NUM_01
                b_DOCU_NUM_01 = '000%s%s01' % (DOCU_NUM[0:-3], int(DOCU_NUM[-3]) - 1)

    else:
        b_DOCU_NUM_001 = '%s001' % DOCU_NUM[0:-3]
        b_DOCU_NUM_01 = '%s01' % DOCU_NUM[0:-2]
        b_DOCU_NUM_1 = '%s1' % DOCU_NUM[0:-1]
        b_DOCU_NUM = '%s' % DOCU_NUM
        if DOCU_NUM[-1] == '0':
            # 重写b_DOCU_NUM_1
            b_DOCU_NUM_1 = '%s%s1' % (DOCU_NUM[0:-2], int(DOCU_NUM[-2]) - 1)
            if DOCU_NUM[-2] == '0':
                # 重写b_DOCU_NUM_1
                b_DOCU_NUM_1 = '%s%s1' % (DOCU_NUM[0:-3], int(DOCU_NUM[-3:-1]) - 1)
                # 重写b_DOCU_NUM_01
                b_DOCU_NUM_01 = '%s%s01' % (DOCU_NUM[0:-3], int(DOCU_NUM[-3]) - 1)

    url_mid = '%s/%s/%s/%s/%s/%s/%s/%s' % (
        VOLUME_NUM[0:4], VOLUME_NUM[4:], BUL_TYPE, b_DOCU_NUM_001, b_DOCU_NUM_01, b_DOCU_NUM_1, b_DOCU_NUM,
        HASH_VALUE)

    html_url = 'https://www.j-platpat.inpit.go.jp/cache/original/DESIGN_BUL/%s/%s.xml' % (url_mid, b_DOCU_NUM)
    jpg_url = 'https://www.j-platpat.inpit.go.jp/cache/gazette_work/DESIGN_BUL/%s' % url_mid
    pdf_url = 'https://www.j-platpat.inpit.go.jp/cache/gazette_work/DESIGN_BUL/%s/%s.pdf' % (url_mid, b_DOCU_NUM)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值