c++基于socket的图片爬虫

本文介绍了如何在CodeBlocks环境下,利用C++和socket编程技术开发一款图片爬虫,涉及引入ws2_32.dll库的支持。
摘要由CSDN通过智能技术生成
#include <iostream>
#include <winsock2.h>
#include <fstream>
#include <string>
#include <sstream>
using namespace std;

int makeSocket(string url,int port)
{
    WSADATA wsadata;
    WSAStartup(0x202,&wsadata);
    int sk = socket(AF_INET,SOCK_STREAM,0);
    hostent *host = gethostbyname(url.c_str());
    sockaddr_in saddr;
    saddr.sin_family = AF_INET;
    saddr.sin_port = htons(port);
    memcpy(&saddr.sin_addr,host->h_addr,4);
    if(connect(sk,(sockaddr *)&saddr,sizeof(saddr))==-1)
    {
        cout<<"connect_error"<<endl;
    }
    else
    {
        cout<<"connected"<<endl;
    }
    return sk;
}

string getName(string name)
{
    for(int i = 0;i<name.size();i++)
    {
        if(name[i]=='/')
            name[i]='_';
    }
    return name;
}

void saveImg(SOCKET sk,string url,string name)
{

    string request = "GET "+name;
    request += " HTTP/1.1\r\n";
    request += "Host:"+url+"\r\n";
    request += "connection:close\r\n";
    request += "\r\n";
    if(send(sk,request.c_str(),request.size(),0)==SOCKET_ERROR)
    {
        cout<<"send_error"<<endl;
    }
    else
    {
        cout<<"sended"<<endl;
    }

    char rebuf[1024];
    int n = -1;
    fstream out;
    out.open("imgs/"+getName(name),ios::out|ios::binary);
    if(!out)
    {
        cout<<"file open ERROR"<<endl;
        return ;
    }
    n = recv(sk,rebuf,sizeof(rebuf)-sizeof(char),0);
    cout<<n<<endl;


    char* cpos = strstr(rebuf, "\r\n\r\n");//7
    out.write(cpos + strlen("\r\n\r\n"), n - (cpos - rebuf) - strlen("\r\n\r\n"));


    while((n=recv(sk,rebuf,sizeof(rebuf)-sizeof(char),0))>0)
    {
        //cout<<n<<endl;
        out.write(rebuf,n);
        //html += rebuf;
        //html += "\r\n";
    }
    out.close();
    closesocket(sk);
}

void urlHandle(string url,string &uri,string &content)
{
    int pos = url.find_first_of('/');
    uri = url.substr(0,pos);
    content = url.substr(pos,url.size());
}

string getHtml(string url)
{
    string uri,content;
    urlHandle(url,uri,content);

    SOCKET skimg = makeSocket(uri,80);
    string data = "GET "+content +" HTTP/1.1\r\n";
    data += "Host:"+uri + "\r\n";
    data += "connection:close\r\n";
    data += "\r\n";
    if(send(skimg,data.c_str(),data.size(),0)==SOCKET_ERROR)
    {
        cout<<"SEND error"<<endl;
    }
    else
    {
        cout<<"sended"<<endl;
    }
    string html;
    char buf[1024*5];
    int n = 0;
    while((n=recv(skimg,buf,sizeof(buf)-sizeof(char),0))!=0)
    {
        for(int i = 0;i<n;i++)
        {
            html += buf[i];
        }
    }
    closesocket(skimg);
    return html;
}

string reg(string line,string head,string rear)
{
    int a = line.find(head);
    int b = line.find(rear,1);
    if(a==-1||b==-1)
        return "";
    return line.substr(a+head.size(),b-a-head.size());
}

int getImgsUrl(string html,string imgsUrl[])
{
    int i = 0;
    stringstream ss;
    ss<<html;
    string line;
    while(getline(ss,line))
    {
        string res = reg(line,"</strong><a href=\"","\" title");
        if(res!="")
        {
            imgsUrl[i] = res;
            i++;
        }
    }
    return i;
}

int getAnImgUrl(string html,string imgUrl[])
{
    int i = 0;
    stringstream ss;
    ss<<html;
    string line;
    while(getline(ss,line,';'))
    {
        string res = reg(line,"<br><img src=\"http:\/\/","\" >&nbsp");
        if(res!="")
        {
            imgUrl[i] = res;
            i++;
        }
    }
    return i;
}


int main()
{
    string imgType = "www.46eh.com/html/part/17_5.html";
    string html = getHtml(imgType);
    //cout<<html<<endl;
    string imgsUrl[100];
    int n = getImgsUrl(html,imgsUrl);
    for(int i = 0;i<n;i++)
    {
        cout<<i<<"   "<<imgsUrl[i]<<endl;
    }
    string uri,content;
    urlHandle(imgType,uri,content);
    for(int i = 0;i<n;i++)
    {
        string imgsHtml = getHtml(uri + imgsUrl[i]);
        //cout<<imgsHtml<<endl;
        string animgs[100];
        int ans = getAnImgUrl(imgsHtml,animgs);
        for(int i = 0;i<ans;i++)
        {
            cout<<animgs[i]<<endl;
            string uri,content;
            urlHandle(animgs[i],uri,content);
            SOCKET sk = makeSocket(uri,80);
            if(sk==-1)
                continue;
            saveImg(sk,uri,content);
            //closesocket(sk);
        }
    }
}

这里写图片描述

#include <winsock2.h>引入以后socket相关函数还是无法使用,这是因为没有ws2_32.dll

codeblockes引入ws2_32.dll

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值