抖音视频评论提取源码第二篇
一:概述:
此篇文章讲解整套系统的源码逻辑和流程
二:构架:
抖音视频评论采集云端版采用客户端和提取端分离的方式来进行开发。
客户端:用于用户提交要采集的内容任务和显示数据。 无需登录抖音号。
采集端:用于接收客户端提交过来的数据,并且处理数据。
三:关键词批量解析任务思路
3.1:解析视频数据服务端:通过客户端创建一个关键词进行搜索时。首先要做得是通过自动化搜索,先搜索出关键词,然后获取视频得videoid。获取到videoid后将videoid储存,然后进入评论提取。 自动化操作得有自动打开搜索页面,自动搜索和自动下拉获取下页视频,循环提取videoid和去重
视频解析源码
服务端弄好后,可以随意写客户端,手机端和 BS网页端和软件版
如果要写单机版本的话 就整合在一起即可 (其它的流程将会在后面的文章进行讲解)。
下拉代码
private void xiala()//刷新视频
{
string script = "window.scrollTo(0, document.body.scrollHeight);";
// 执行JavaScript代码
chromeBrowser.ExecuteScriptAsync(script);
}
private void video_id_url()//循环获取视频
{
int video_id_data_count = 0;
string htmlContent = chromeBrowser.GetSourceAsync().Result;
jieshu = htmlContent;
string input = htmlContent;
string input1 = input;
// < li class="MgWTwktU B9KMVC9A">
string pattern = "<li class=\"SwZLHMKk SEbmeLLH\">(.*?)</li>";
// string pattern = "<li class=\"HN50D2ec Z3LKqldT\">(.*?)</li>";
// string pattern = "<li class=\"MgWTwktU B9KMVC9A\">(.*?)</li>";
// string pattern = "<li class=\"MgWTwktU search-result-card B9KMVC9A\">(.*?)</li>";
MatchCollection matches = Regex.Matches(input1, pattern);
foreach (Match match in matches)
{
string aaaaa = match.Groups[1].Value;
string url = aaaaa;
string pattern1 = @"\/video\/(\d+)";
Match match1 = Regex.Match(url, pattern1);
if (match1.Success)
{
string id = match1.Groups[1].Value;
string li_id = "";
int li_count = 0;
// while (li_count<listBox1.Items.Count)
OleDbCommand comm = new OleDbCommand("select count (*) from video_id where xnumbers='" + t_xnumbers.Text.Trim() + "' and mess='" + id + "'", ole_con);
ole_con.Open();
OleDbDataReader dr = comm.ExecuteReader();
if (dr.Read())
{
try
{
video_id_data_count = Convert.ToInt32(dr.GetValue(0));
}
catch
{
video_id_data_count = 0;
}
}
ole_con.Close();
if (video_id_data_count == 0)
{
OleDbCommand comm1 = new OleDbCommand("insert into video_id (mess,xnumbers) values ('" + id + "','" + t_xnumbers.Text.Trim() + "')", ole_con);
ole_con.Open();
comm1.ExecuteNonQuery();
ole_con.Close();
listBox1.Invoke(new MethodInvoker(() =>
{
string li_name = "";
int count_li_d = 0;
SqlCommand comm6_1 = new SqlCommand("select count (*) from li_d where username='" + t_username.Text.Trim() + "'", conn);
conn.Open();
SqlDataReader dr_61 = comm6_1.ExecuteReader();
if (dr_61.Read())
{
try
{
count_li_d = Convert.ToInt32(dr_61.GetValue(0));
}
catch
{
count_li_d = 0;
}
}
else
{
count_li_d = 0;
}
conn.Close();
if (count_li_d == 0)
{
SqlCommand comm6 = new SqlCommand("select name,id from li ORDER BY NEWID() ", conn);
conn.Open();
SqlDataReader dr6 = comm6.ExecuteReader();
if (dr6.Read())
{
li_name = dr6.GetString(0).Trim();
}
conn.Close();
}
else
{
SqlCommand comm6 = new SqlCommand("select name,id from li_d where username='" + t_username.Text.Trim() + "' ORDER BY NEWID() ", conn);
conn.Open();
SqlDataReader dr6 = comm6.ExecuteReader();
if (dr6.Read())
{
li_name = dr6.GetString(0).Trim();
}
conn.Close();
}
//else
//{
// SqlCommand comm6 = new SqlCommand("select name,id from li1 ORDER BY NEWID() ", conn);
// conn.Open();
// SqlDataReader dr6 = comm6.ExecuteReader();
// if (dr6.Read())
// {
// li_name = dr6.GetString(0).Trim();
// }
// conn.Close();
//}
int li = 0;
if (t_username.Text.Trim() == "he")
{
li = 1;
}
else
{
li = 0;
}
SqlCommand comm2 = new SqlCommand("insert into video_id (username,xnumbers,video_id,li_name,dingshi,li) values ('" + t_username.Text.Trim() + "','" + t_xnumbers.Text.Trim() + "','" + id +