日期匹配
\d{4}[-\/\. ,]\d{1,2}[-\/\. ,]\d{1,2}
\d{1,2}[-\/\. ,]\d{1,2}[-\/\. ,](\d{2}|\d{4})
\d{1,2}[-\/\. ,]\d{2,4}
\d{8}
(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec|January|February|March|April|June|July|August|September|October|November|December|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|SEPT|OCT|NOV|DEC|JANUARY|FEBRUARY|MARCH|APRIL|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec|january|february|march|april|june|july|august|september|october|november|december)[-\/\. ,]?\d{1,2}[-\/\. ,]{0,2}(\d{4}|\d{2})
\d{0,2}(st|nd|rd|th)?[-\/\. ,]?(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec|January|February|March|April|June|July|August|September|October|November|December|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|SEPT|OCT|NOV|DEC|JANUARY|FEBRUARY|MARCH|APRIL|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec|january|february|march|april|june|july|august|september|october|november|december)[-\/\. ,]{0,2}(\d{4}|\d{2})
(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec|January|February|March|April|June|July|August|September|October|November|December|JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|SEPT|OCT|NOV|DEC|JANUARY|FEBRUARY|MARCH|APRIL|JUNE|JULY|AUGUST|SEPTEMBER|OCTOBER|NOVEMBER|DECEMBER|jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec|january|february|march|april|june|july|august|september|october|november|december)[-\/\. ,]{0,2}\d{0,2}(st|nd|rd|th)?[-\/\. ,]{0,2}(\d{4}|\d{2})
month_pattern = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec|"
month_pattern += "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec|".lower()
month_pattern += "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec|".upper()
month_pattern += "January|February|March|April|June|July|August|September|October|November|December|"
month_pattern += "January|February|March|April|June|July|August|September|October|November|December|".lower()
month_pattern += "January|February|March|April|June|July|August|September|October|November|December".upper()
month_pattern_1 = rf"({month_pattern})[-/. ,]?\d{{1,2}}[-/. ,]{{0,2}}(\d{{4}}|\d{{2}})"
month_pattern_2 = rf"\d{{0,2}}(st|nd|rd|th)?[-/. ,]?({month_pattern})[-/. ,]{{0,2}}(\d{{4}}|\d{{2}})"
month_pattern_3 = rf"({month_pattern})[-/. ,]{{0,2}}\d{{0,2}}(st|nd|rd|th)?[-/. ,]{{0,2}}(\d{{4}}|\d{{2}})"
datetime_pattern_list = [
re.compile(r"""\d{4}[-/. ,]\d{1,2}[-/. ,]\d{1,2}"""),
re.compile(r"""\d{1,2}[-/. ,]\d{1,2}[-/. ,](\d{2}|\d{4})"""),
re.compile(r"""\d{1,2}[-/. ,]\d{2,4}"""),
re.compile(r"""\d{8}"""),
re.compile(month_pattern_1),
re.compile(month_pattern_2),
re.compile(month_pattern_3)
]
def match_datetime(content: str) -> str:
for pattern in datetime_pattern_list:
matched_result = re.search(pattern, content)
if not matched_result:
continue
return matched_result
return ''
日期校验
import pandas as pd
def datetime_format(content: str) -> str:
try:
date = str(pd.to_datetime(content, errors='raise'))
assert date != 'NaT'
return date
except Exception:
return ''
datetime_format('01-feb-23')
测试用例
24/08/01
2001-08-24
24.08.2001
24/08/2001
2001-08-24
24/08/01
24-08-01
24-08-01
2001-08-24
24/08/01
08-24-01
24/08/2001
23 Nov 2022
14Dec2022
July 2022
18/8/2022
30 Aug 2022
30Dec 2022
25 Oct 2022
17 Nov 2022
01/16/2022
13 Jul2022
15 Dec-2022
06 Dec-2022
January 2023
Date17/Feb/20231227PM
the new tariff effective 21/12/2022.
ELECTRICITY BILL FOR:February 2023
DUE DATE:2/16/2023
Feb.2023
Dec,2022
01-Feb-22
13th February, 2023
NOV-2023
12:14, Oct 31, 2022
2/2023
Thu Feb 09 2023 16:52:29 GMT+010
Jan 20th,2023
Date 01/Feb/2023 11-52 AM
Date of Issue: Thu, Jan 26, 2023 11:43:39
29-11-022
DateTime :Fri,Mar 11th,2022 2
Jul 21, 2021, 8:15 PM
Feb 19 20
February 17, 20:13
Feb 03, 2023, 00:24
DEC2022
0.11 itlPro.01FEB 2023
17/02/2311:59:50
Feb18,2023.09:57
Sun Feb 19 20
February 7, 07:46
14:38, Jan 01, 2023
February 21, 07:47
February 12,2023,08:44
Mon Feb 20 20
Date:Feb 20,2023,22:38
15:11, Feb 20, 2023
15:11, Feb 20, 2023
Feb 20,2023.12:45 PM
Wed Feb 15 2023 07:36:27 GMT+010
Feb 21,2023,16:24
14-Aug-2020
01-feb-23