1. Detail step
Step 1: Replace HTML special characters with named character references
& ---> &
< ---> <
> ---> >
Step 2: Replace all line breaks with
result = re.sub("\r\n?|\n", "
", subject)
Step 3: Replace double
tags with
result = re.sub(r"
\s*
", "
", subject)
Step 4: Wrap the entire string with
...
result = "
" + subject + "
"2. Python code
def plainTextToHtml(subject):
import re
# Step 1 (plain text searches)
subject = re.sub("&", "&", subject)
subject = re.sub("
subject = re.sub(">", ">", subject)
# Step 2
subject = re.sub("\r\n?|\n", "
", subject)
# Step 3
subject = re.sub(r"
\s*
", "
", subject)
# Step 4
subject = "
" + subject + "
"return subject
3. Test
In [2]: plainTextToHtml("Test.")
Out[2]: '
Test.
'In [3]: plainTextToHtml("Test.\n")
Out[3]: '
Test.
'In [4]: plainTextToHtml("Test.\n\n")
Out[4]: '
Test.
In [5]: plainTextToHtml("Test1.\nTest2.")
Out[5]: '
Test1.
Test2.
In [6]: plainTextToHtml("Test1.\n\nTest2.")
Out[6]: '
Test1.
Test2.
'In [7]: plainTextToHtml("< AT&T >")
Out[7]: '
< AT&T >
'