TypeError: can't concat str to bytes OR TypeError: cannot use a string pattern on a bytes-like obj

the differences between python2.x  and python3.x  as you tried to write words into a file

#Under the python 3.x


def combineWordsFromFeed(filename):
    #with open(filename, 'w',encoding='utf-8') as wfile:   #for python 3.x
    with open(filename, 'w') as wfile:    #for python 2.x
        for feed in feedlist:
            print("Parsing " + feed)
            fp = feedparser.parse(feed)
            
            for e in fp.entries:
                #txt = e.title+ extractPlainText(e.description)#for python 3.x
                txt = e.title.encode('utf8') +extractPlainText(e.description.encode('utf8'))#for python 2.x
                words = separatewords(txt)
                
                #print(words)
                for word in words:
                    if word.isdigit() == False and word not in mystopwords:
                        wfile.write(word)
                        wfile.write(" ")
                wfile.write("\n")
    wfile.close()
    return

output:

TypeError                                 Traceback (most recent call last)
<ipython-input-65-08ed4df9ed8a> in <module>()
----> 1 combineWordsFromFeed("wordcloudInput_FromFeeds.txt")

<ipython-input-64-a19447dbe8ed> in combineWordsFromFeed(filename)
      8             for e in fp.entries:
      9                 #txt = e.title+ extractPlainText(e.description)#for python 3.x
---> 10                 txt = e.title.encode('utf8') +extractPlainText(e.description.encode('utf8'))#for python 2.x
     11                 words = separatewords(txt)
     12 

TypeError: can't concat str to bytes

###############################################################

def combineWordsFromFeed(filename):
    #with open(filename, 'w',encoding='utf-8') as wfile:   #for python 3.x
    with open(filename, 'w') as wfile:    #for python 2.x
        for feed in feedlist:
            print("Parsing " + feed)
            fp = feedparser.parse(feed)
            
            for e in fp.entries:
                #txt = e.title+ extractPlainText(e.description)#for python 3.x
                txt = e.title.encode('utf8') #for python 2.x
                words = separatewords(txt)
                
                #print(words)
                for word in words:
                    if word.isdigit() == False and word not in mystopwords:
                        wfile.write(word)
                        wfile.write(" ")
                wfile.write("\n")
    wfile.close()
    return

output:

TypeError                                 Traceback (most recent call last)
<ipython-input-67-08ed4df9ed8a> in <module>()
----> 1 combineWordsFromFeed("wordcloudInput_FromFeeds.txt")

<ipython-input-66-bfa60f6626f9> in combineWordsFromFeed(filename)
      9                 #txt = e.title+ extractPlainText(e.description)#for python 3.x
     10                 txt = e.title.encode('utf8')
---> 11                 words = separatewords(txt)
     12
     13                 #print(words)

<ipython-input-63-06f3809616f7> in separatewords(text)
      1 def separatewords(text=' '):
      2     splitter = re.compile('\\W*')
----> 3     return [s.lower() for s in splitter.split(text) if len(s) >3]

TypeError: cannot use a string pattern on a bytes-like object

###############################################################

solution:

def combineWordsFromFeed(filename):
    with open(filename, 'w',encoding='utf-8') as wfile:   #for python 3.x
    #with open(filename, 'w') as wfile:    #for python 2.x
        for feed in feedlist:
            print("Parsing " + feed)
            fp = feedparser.parse(feed)
            
            for e in fp.entries:
                txt = e.title+ extractPlainText(e.description)#for python 3.x
                #txt = e.title.encode('utf8') +extractPlainText(e.description.encode('utf8'))#for python 2.x
                words = separatewords(txt)
                
                #print(words)
                for word in words:
                    if word.isdigit() == False and word not in mystopwords:
                        wfile.write(word)
                        wfile.write(" ")
                wfile.write("\n")
    wfile.close()
    return

###############################################################

#Under the python 2.x

def combineWordsFromFeed(filename):
    with open(filename, 'w',encoding='utf-8') as wfile:   #for python 3.x
    #with open(filename, 'w') as wfile:    #for python 2.x
        for feed in feedlist:
            print("Parsing " + feed)
            fp = feedparser.parse(feed)
            
            for e in fp.entries:
                txt = e.title+ extractPlainText(e.description)#for python 3.x
                #txt = e.title.encode('utf8') +extractPlainText(e.description.encode('utf8'))#for python 2.x
                words = separatewords(txt)
                
                #print(words)
                for word in words:
                    if word.isdigit() == False and word not in mystopwords:
                        wfile.write(word)
                        wfile.write(" ")
                wfile.write("\n")
    wfile.close()
    return

TypeError                                 Traceback (most recent call last)
<ipython-input-7-08ed4df9ed8a> in <module>()
----> 1 combineWordsFromFeed("wordcloudInput_FromFeeds.txt")

<ipython-input-6-26ff6499205a> in combineWordsFromFeed(filename)
      1 def combineWordsFromFeed(filename):
----> 2     with open(filename, 'w',encoding='utf-8') as wfile:   #for python 3.x
      3     #with open(filename, 'w') as wfile:    #for python 2.x
      4         for feed in feedlist:
      5             print("Parsing " + feed)

TypeError: 'encoding' is an invalid keyword argument for this function

Solution

def combineWordsFromFeed(filename):
    #with open(filename, 'w',encoding='utf-8') as wfile:   #for python 3.x
    with open(filename, 'w') as wfile:    #for python 2.x
        for feed in feedlist:
            print("Parsing " + feed)
            fp = feedparser.parse(feed)
            
            for e in fp.entries:
                #txt = e.title+ extractPlainText(e.description)#for python 3.x
                txt = e.title.encode('utf8') +extractPlainText(e.description.encode('utf8'))#for python 2.x
                words = separatewords(txt)
                
                #print(words)
                for word in words:
                    if word.isdigit() == False and word not in mystopwords:
                        wfile.write(word)
                        wfile.write(" ")
                wfile.write("\n")
    wfile.close()
    return

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值