gist: https://gist.github.com/genesislive/5097326
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# http://www.pythonchallenge.com/pc/def/ocr.html
import re
# the content of rare_characters.txt is from the
# source of ocr.html
with open('rare_characters.txt', 'r') as file1:
content = file1.read()
print ''.join(re.findall('[A-Za-z]', content))