weibo_json

f_in = open("in.txt", "r")
f_out = open("out.txt", "w")
INDENT = 4
brace_count = 0#number of {
bracket_count=0
write_for_brakcet = False
for line in f_in:
    for i in range(len(line)):
        if write_for_brakcet:
            if line[i]==']':
                write_for_brakcet = False
            f_out.write(line[i])
            continue
        if line[i]=='{':
            brace_count = brace_count + 1
            f_out.write('{\n'+' '*INDENT*(brace_count+bracket_count))            
        elif line[i]=='[':           
            if line[i+1]=='{':
                bracket_count = bracket_count + 1
                f_out.write('[\n'+' '*INDENT*(brace_count+bracket_count))
            else:
                write_for_brakcet = True
                f_out.write('[')
        elif line[i]=='}':
            brace_count = brace_count - 1
            if i<len(line)-1 and line[i+1]==',':
                f_out.write('}')
            elif i<len(line)-1 and line[i+1]==']':
                f_out.write('}\n'+' '*INDENT*(brace_count+bracket_count-1))
            else:
                f_out.write('}\n'+' '*INDENT*(brace_count+bracket_count))
        elif line[i]==']':
            bracket_count = bracket_count - 1
            if i<len(line)-1 and line[i+1]=='}':
                f_out.write(']\n'+' '*INDENT*(brace_count+bracket_count-1))
            else:
                f_out.write(']\n'+' '*INDENT*(brace_count+bracket_count))
        elif line[i]==',':
            if line[i+1]=='}':
                f_out.write(','+' '*INDENT*(brace_count+bracket_count))
            else:
                f_out.write(',')
        elif line[i]==' ' and line[i-1]==',':
            f_out.write('\n'+' '*INDENT*(brace_count+bracket_count))
        elif i<len(line)-1 and line[i+1]=='}':
            f_out.write(line[i]+'\n'+' '*INDENT*(brace_count+bracket_count-1))
        else:
            f_out.write(line[i])
f_in.close()
f_out.close()

转载于:https://www.cnblogs.com/haoqingchuan/archive/2013/03/01/2938933.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
import re import json import time import requests import datetime import pymysql import selenium from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.declarative import declarative_base from selenium.webdriver import Edge, EdgeOptions # 创建浏览器对象 options = EdgeOptions() options.use_chromium = True options.binary_location = r'C:\Users\邓枫林\PycharmProjects\pythonProject\edgedriver_win64\msedgedriver.exe' browser = Edge(options=options) wait = WebDriverWait(browser, 10) # 打开微博话题页面 url = 'https://weibo.com/n/%E4%B8%AD%E5%9B%BD%E9%A3%9F%E5%93%81%E5%8D%AB%E7%94%9F?from=feed&loc=at&nick=%E4%B8%AD%E5%9B%BD%E9%A3%9F%E5%93%81%E5%8D%AB%E7%94%9F&order=hot' browser.get(url) # 等待页面加载完成 wait.until(lambda driver: driver.execute_script("return document.readyState") == "complete") browser = selenium.webdriver.Edge(executable_path='C:/Users/邓枫林/PycharmProjects/pythonProject/edgedriver_win64/msedgedriver.exe') wait = selenium.webdriver.support.ui.WebDriverWait(browser, 10) # 监测页面是否包含“高校类”敏感词汇 if '高校类' in browser.page_source: # 获取原始微博 weibo = browser.find_element_by_css_selector('.WB_feed_detail .WB_text.W_f14').text # 获取转发该微博的用户昵称和转发内容 reposts = [] repost_items = browser.find_elements_by_css_selector('.list_ul .list_li') for item in repost_items: nickname = item.find_element_by_css_selector('.WB_text.W_f14').text content = item.find_element_by_css_selector('.WB_text.W_f14 + .comment_txt').text reposts.append({'nickname': nickname, 'content': content}) # 关闭浏览器 browser.quit() # 将微博和转发内容存入MySQL数据库中 Base = declarative_base() class Weibo(Base): __tablename__ = 'weibo_user' id = Column(Integer, primary_key=True) content = Column(Text) create_time = Column(DateTime) class Repost(Base): __tablename__ = 'weibo_repost' id = Column(Integer, primary_key=True) weibo_id = Column(Integer) nickname = Column(String(50)) content = Column(Text) engine = create_engine('mysql+pymysql://root:root@hostname:port/weibo?charset=utf8mb4') Session = sessionmaker(bind=engine) session = Session() now = datetime.datetime.now() weibo_obj = Weibo(content=weibo, create_time=now) session.add(weibo_obj) session.commit() for repost in reposts: repost_obj = Repost(weibo_id=weibo_obj.id, nickname=repost['nickname'], content=repost['content']) session.add(repost_obj) session.commit() session.close() else: # 关闭浏览器 browser.quit()
最新发布
06-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值