import time from time import ctime, sleep import os import threading # 多线程 from selenium import webdriver # selenium模拟登陆 from selenium.webdriver.firefox.options import Options # Firefox无头登陆 import urllib.request # urllib.request.urlretrieve(url,dir) # 下载url至dir def format_name(path=r'C:\Users\lyl8373\Desktop\PycharmProjects\Crawlers_HR\Toutiao_Crawler\hands'): # for i in os.walk(path): # print(i) files = os.listdir(path) for i in files: new = '.'.join(i.split('.')[:2]) # 新名字的格式 # print(path+i,path+new) os.renames(path + '\\' + i, path + '\\' + new) ''' 1、python保存numpy数据: numpy.savetxt("result.txt", numpy_data) 2、保存list数据: file=open('data.txt','w') file.write(str(list_data)); file.close() ''' def write_in_txt(fileName): fp = open(fileName, 'w+') list = [6515, 51651, 51816, 1851321, 9618] fp.write( str(list) ) fp.close() def read_from_txt(fileName): fp = open(fileName, "r") lines = fp.read() # while line: # print(line,end="") # line=fp.readline() fp.close() return lines def print_time(name, delay): time1 = time.time() print(name, ctime(time1)) time.sleep(delay) time2 = time.time() print(name, ctime(time2)) def os_path_func(): print(__file__) # 获取当前路径下文件名 print(os.getcwd()) # 获取当前目录 print(os.path.dirname(__file__)) # 取用当前文件名的上一级目录 # 即当前目录 def test_threading(): # 用于试验多线程threading def music(func): for i in range(10): print("I was listening to %s. %s" % (func, ctime())) sleep(1) def move(func): for i in range(10): print("I was at the %s! %s" % (func, ctime())) sleep(5) threads = [] t1 = threading.Thread(target=music, args=(u'爱情买卖',)) # threading.Thread(target=函数名,args=(函数参数)) #定义了一个线程 threads.append(t1) # 在线程列表中添加 t2 = threading.Thread(target=move, args=(u'阿凡达',)) threads.append(t2) if __name__ == '__main__': for t in threads: # t.setDaemon(True) t.start() # 开启线程 for t in threads: t.join() # 使线程都结束后共同退出 print("all over %s" % ctime()) def monitor_login_firefox(): driver = webdriver.Firefox() driver.get('https://detail.tmall.com/item.htm?spm=a230r.1.0.0.49eae18aya4W14&id=546355972923&ns=1') print(driver.find_element_by_xpath("//img[@id='J_ImgBooth']").get_attribute(name="src")) print(driver.page_source.xpath("//img[@id='J_ImgBooth']/@src")) driver.close() def monitor_Firefox_headless(url, DirPath): # BasePath = os.path.dirname(__file__) options = Options() options.add_argument('-headless') # 无头参数 driver = webdriver.Firefox(firefox_options=options) # 配了环境变量第一个参数就可以省了,不然传绝对路径 driver.get(url) try: try: url = driver.find_element_by_xpath('//video[@class="vjs-tech"]').get_attribute(name='src') except: pass driver.close() name = DirPath + '/' + url.split('/')[3] + '.mp4' try: urllib.request.urlretrieve(url, name) print("down video:" + url.split('/')[3] + '.mp4' + " successful!") except: print("down video:" + url.split('/')[3] + '.mp4' + " Failed!") except: pass # monitor_login_firefox() # monitor_Firefox_headless('https://www.toutiao.com/',r'c:/') # print_time("a",3) # fileName='hello.txt' # write_in_txt(fileName) # mm=read_from_txt(fileName) # list=(mm.lstrip('[').rstrip(']').split(',')) # print(mm) # for i in list: # print(int(i.strip())+50) # format_name(r'C:\Users\lyl8373\Desktop\PycharmProjects\Crawlers_HR\Toutiao_Crawler\手指')
Python Function
最新推荐文章于 2023-07-20 10:55:07 发布