序言
android手机可以实现滚动截图,然后用要采集某某买菜的产品数据,主要是分析产品数据来选择精品商品,方便选品。
可以先用android手机支持滚动截图,再用神奇图片分割软件把截图分割成各个小份,再用图片识别技术读取图片上的文字,再把截图和文字通过接口传到云端。
原创代码如下:
# -*- coding: utf-8 -*- import os import pytesseract from PIL import Image from base.base_api import BaseApi from base.base_root import BaseRoot from base.req_up_data import up_file curpath = BaseRoot.root_path curpath=curpath.replace("\\",'/') folder = "import" pic_dir = curpath + f'{folder}/' def traverse_dir(path): for root, dirs, files in os.walk(path): if "0000" in root and "finish" not in root: src_path= f"{root}" desc_path= f"{root}_finish" #os.rename(src_path,desc_path) for file in files: if "jpg" in file: mydict = {} file_arr = file.split("-") mydict['warehouse_son_name'] = file_arr[0] mydict['warehouse_son_code'] = file_arr[1] mydict['warehouse_name'] = file_arr[2] mydict['warehouse_code'] = file_arr[3] mydict['category'] = file_arr[4] mydict['near_buy'] ='' mydict['day_buy'] ='' mydict['year'] = file_arr[5] mydict['month'] = file_arr[6] mydict['day'] = file_arr[7][:2] mydict['goods_name'] = "" mydict['goods_price'] ="0" mydict['now_day'] = f"{mydict['year']}-{mydict['month']}-{mydict['day']}" #print(file_arr) resolve_pic = f"{root}/{file}" #print(resolve_pic) file_type = resolve_pic.split('.')[-1] #print(file_type) save_file_url = up_file(resolve_pic, file_type) #print(save_file_url) mydict['img_url'] = f"https://你的oss地址.aliyuncs.com{save_file_url}" image = Image.open(resolve_pic) content = pytesseract.image_to_string(image, lang='chi_sim+eng') content = str(content).split("\n") #print(content) save_content = "" for temp_content in content: if "附近" in temp_content: mydict['near_buy']=temp_content elif "无理由退换" in temp_content: pass elif "贝勾" in temp_content: pass elif "优晨价" in temp_content: mydict['goods_price'] = "0.1" else: save_content = save_content + temp_content mydict['goods_name'] = save_content.replace(' ','') print(mydict) response_text = BaseApi.uploadGoodsData(mydict) print(f"------打印第{resolve_pic}提交返回开始------") print(response_text) print(f"------打印第{resolve_pic}提交返回结束------") os.rename(src_path,desc_path) #print("当前目录:", root) #print("子目录列表:", dirs) #print("文件列表:", files) dir_path = pic_dir #print('待遍历的目录为:', dir_path) #print('遍历结果为:') traverse_dir(dir_path)