gj 更新

#include "stdafx.h"
#include "Test.h"
#include "TestDlg.h"
#include <afxinet.h>

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
/*========================================================================*/

// 深圳公交         "http://shenzhen.8684.cn/"
// "981路"          "x_7ca8d133" 测试 网页内容没有抓全的问题
// "209路区间快车"  "x_32245c22" 测试 抓取回程宕机的问题,这是单程线路
// "1路"            "x_24f5dad9" 抓取数据的起始线路,其实随便一个线路都可以
// "220路"          "x_d397d994" 测试 遇到带"站)"的站台后,回程线路获取不全的问题
// "高峰专线41"     "x_28a86909" 单向行驶 未抓取到数据
// "假日专线(横岗-大梅沙)"  "x_2e54c4c0"    回程6站。多加了1站。


// 上海公交         "http://shanghai.8684.cn/"
// "01路"           "x_ccc80acf"

// 北京公交         "http://beijing.8684.cn"
// "1路"            "x_24f5dad9"

// 济南             "http://jinan.8684.cn/"
// "1路"            "x_24f5dad9"

#define STR_HTTP_URL    "http://shenzhen.8684.cn/"
#define STR_BUS         "假日专线(横岗-大梅沙)"
#define STR_URL         "x_2e54c4c0"
#define STR_GO          "去程"
#define STR_BACK        "回程"
#define STR_SINGLE      "单向行驶"
#define STR_LINE        "相关线路"
#define STR_HREF_END    "</a>"

CTestDlg::CTestDlg(CWnd* pParent /*=NULL*/)
 : CDialog(CTestDlg::IDD, pParent)
{
 //{{AFX_DATA_INIT(CTestDlg)
 //}}AFX_DATA_INIT
}

void CTestDlg::DoDataExchange(CDataExchange* pDX)
{
 CDialog::DoDataExchange(pDX);
 //{{AFX_DATA_MAP(CTestDlg)
 DDX_Control(pDX, IDC_PROGRESS1, m_pp);
 DDX_Control(pDX, IDC_LIST1, m_list);
 //}}AFX_DATA_MAP
}

BEGIN_MESSAGE_MAP(CTestDlg, CDialog)
 //{{AFX_MSG_MAP(CTestDlg)
 ON_BN_CLICKED(IDC_BUTTON1, OnButton1)
 ON_EN_KILLFOCUS(IDC_EDIT1, OnKillfocusEdit1)
 //}}AFX_MSG_MAP
END_MESSAGE_MAP()

/*========================================================================*/


#define MAX  10

BOOL CTestDlg::OnInitDialog()
{
 CDialog::OnInitDialog();

 m_imagelist.Create(16,16,TRUE,2,2);
 m_imagelist.Add(AfxGetApp()->LoadIcon(IDI_ICON1));
 m_list.SetImageList(&m_imagelist,LVSIL_SMALL);

 m_font.CreateFont(16, 0,0,0,FW_NORMAL, 0,0,0,
  DEFAULT_CHARSET, OUT_CHARACTER_PRECIS, CLIP_CHARACTER_PRECIS,
  DEFAULT_QUALITY, DEFAULT_PITCH | FF_DONTCARE, "Arial");
 m_list.SetFont(&m_font);


/*-----------------------------------------------------------*/ 
 m_list.SetExtendedStyle(LVS_EX_FULLROWSELECT | LVS_EX_GRIDLINES);
 m_list.SetBkColor(RGB(247,247,255));
 m_list.SetTextColor(RGB(0,0,255));
 m_list.SetTextBkColor(RGB(247,247,255));
 m_list.InsertColumn(0, "序号", LVCFMT_LEFT, 50);
 m_list.InsertColumn(1, "公交路线", LVCFMT_LEFT, 350);
 m_list.InsertColumn(2, "下载状态", LVCFMT_LEFT, 150);

 m_pp.SetRange(1,MAX+1);
 m_pp.SetPos(0);
 m_pp.SetStep(1);

 return TRUE; 
}

void CTestDlg::OnButton1()
{
    CString strURL;
    strURL="http://www.baidu.com/img/baidu_logo.gif";
    int nIndex=m_list.InsertItem(0xffff,"0",0);
    m_list.SetItemText(nIndex,1,strURL);
    if(::URLDownloadToFile(NULL,strURL,"baidu_logo.gif",0,NULL) == S_OK)
    {
        m_list.SetItemText(0,2,"文件下载完成!");
    }
    else
    {
        m_list.SetItemText(0,2,"文件下载失败...");
    }
   
    GetBusInfo();
    SaveToFile();

    MessageBox("下载完成!", "公交信息", MB_ICONASTERISK | MB_OK);
}

void CTestDlg::OnKillfocusEdit1()
{
}


CString getHTML(CString strURL)
{   
    CInternetSession mySession(NULL,0);
    CHttpFile* myHttpFile=NULL;
    CString strHtml="";
       
    CString myData;   
    myHttpFile=(CHttpFile*)mySession.OpenURL(strURL);

    while(myHttpFile->ReadString(myData))
    {
        strHtml += myData + "/n";
    }
    strHtml += myData;  // 有时候明明读取到内容了,但 ReadString 返回了 FASLE
/*
    // 通过 Read 方法读取文本
    const int size = 1024;
    byte pByte[size];
    int count = 0;
    vector<byte> vecByte;
    CString strUpdateInfo;
    while( (count = myHttpFile->Read(pByte, size)) > 0 )
    {
        for(int i = 0; i < count; ++i)
        {
            vecByte.push_back(pByte[i]);
        }
       
        if( count < size )
            break;
    }
   
    if( vecByte.size() > 0 )
    {
        byte * pB = new byte[vecByte.size()];
        copy(vecByte.begin(),vecByte.end(),pB);
        TCHAR * pChr = (TCHAR*)pB;
        strHtml = pChr;
        delete [] pB;
    }  
*/
    myHttpFile->Close();
    mySession.Close();

    WriteFile(strHtml, "3.txt");

    return strHtml;
}

// 写文件
void WriteFile(const char * pszContent, const char * pszFilename)
{
    FILE * fp;
    if( (fp = fopen(pszFilename, "w+t")) != NULL)
    {
        fwrite(pszContent, sizeof(char), strlen(pszContent), fp);
        fclose(fp);
    }
}


void CTestDlg::GetBusInfo()
{
    CString url = STR_HTTP_URL;
    CString tmp;
    /*static*/ char szTmp[1024*16]; // 使用静态变态,不占用函数栈中的内存
    queue<string> queBus;
    queBus.push(STR_BUS);

//#define SAVE_FILE

#ifdef SAVE_FILE
    FILE * fp;
    if( (fp = fopen(GetFileName(), "w+t")) == NULL)
        return;
#endif

    Bus_t  t;
    t.bSaved = false;
    t.url = STR_URL;
    t.busName = STR_BUS;
    m_mapBus.insert(pair<string, Bus_t>(FormatBusLineToOrder(STR_BUS), t));

    int nL;
    int i = 1;
    int j = 1;  // 如果不下载 baidu_logo.gif, 这里改为 0
    CString strI;
    strI.Format("%d",i++);
    int nIndex=m_list.InsertItem(0xffff,strI,0);
    m_list.SetItemText(nIndex,1,STR_BUS);
    UpdateWindow();

    CString strBusLine;     // 线路名称,为取回程信息而用

    while(!queBus.empty())
    {
        string b = queBus.front();
        queBus.pop();

        map<string, Bus_t>::iterator ibus = m_mapBus.find(FormatBusLineToOrder(b.c_str()));
        if (ibus != m_mapBus.end())
        {
            Bus_t & bsecond = ibus->second;
            CString html = getHTML(url + bsecond.url.c_str());
           
            // 解析html
            int pos = html.Find(bsecond.url.c_str());
            if (pos != -1)
            {
                html.Delete(0, pos);
                while(html.GetAt(0) != '>') html.Delete(0);
                if (html.GetAt(0) == '>')   html.Delete(0);

                // 线路名称
                int p1 = html.Find(STR_HREF_END);
                if (p1 != -1)
                {
                    strBusLine = html.Left(p1);
                }
            }

            // 取 线路简介 信息
            pos = html.Find(STR_GO);
            if (pos != -1)
            {
                tmp = html.Left(pos);
                nL = tmp.GetLength();
                strcpy(szTmp, tmp);     // 需要注意不要拷贝越界
                StripTags(szTmp);
                bsecond.info = szTmp;   // 简介

                html.Delete(0, pos);
                while(html.GetAt(0) != '>') html.Delete(0);
                if (html.GetAt(0) == '>')   html.Delete(0);
            }
            else
            {
                // 对单向行驶线路的处理
                pos = html.Find(STR_SINGLE);
                if(pos != -1)
                {
                    tmp = html.Left(pos);
                    nL = tmp.GetLength();
                    strcpy(szTmp, tmp);     // 需要注意不要拷贝越界
                    StripTags(szTmp);
                    bsecond.info = szTmp;   // 简介
                   
                    html.Delete(0, pos);
                    while(html.GetAt(0) != '>') html.Delete(0);
                    if (html.GetAt(0) == '>')   html.Delete(0);
                }
            }
           
            // 取 去程 信息
            pos = html.Find(STR_BACK);
            if (pos != -1)
            {
                tmp = html.Left(pos);
                nL = tmp.GetLength();
                strcpy(szTmp, tmp);
                StripTags(szTmp);
                // 去程
                SplitStations(szTmp, bsecond.stationsGo);

                html.Delete(0, pos);
                while(html.GetAt(0) != '>') html.Delete(0);
                if (html.GetAt(0) == '>')   html.Delete(0);
            }

            // 取 回程 信息
            pos = html.Find(strBusLine);
            if (pos != -1)
            {
                tmp = html.Left(pos);
                nL = tmp.GetLength();
                strcpy(szTmp, tmp);
                StripTags(szTmp);
                // 回程
                SplitStations(szTmp, bsecond.stationsBack);

                html.Delete(0, pos);
                while(html.GetAt(0) != '>') html.Delete(0);
                if (html.GetAt(0) == '>')   html.Delete(0);
            }

            bsecond.bSaved = true;  // 该线路已经取到了

#ifdef SAVE_FILE
            SaveToFile(fp, bsecond);
#endif
            m_list.SetItemText(j++,2,"下载完成!");
            m_pp.SetPos(j-1);
            UpdateWindow();
           
            // 获取其他线路的链接
            pos = html.Find(STR_LINE);
            if (pos != -1)
            {
                // 相关线路
                html.Delete(0, pos);
                pos = html.Find("</div>");
                if (pos != -1)
                {
                    html.Delete(pos, html.GetLength()-pos);
                }

                map<string, Bus_t>::iterator ibusT;
                pos = html.Find(STR_HREF_END);
                while(pos != -1)
                {
                    tmp = html.Left(pos);
                    html.Delete(0, pos+strlen(STR_HREF_END));

                    int p1 = tmp.Find("/"");
                    tmp.Delete(0, p1+1);
                    p1 = tmp.Find("/"");

                    CString href = tmp.Left(p1);

                    strcpy(szTmp, tmp);
                    szTmp[0] = '<';
                    StripTags(szTmp);

                    ibusT = m_mapBus.find(FormatBusLineToOrder(szTmp));
                    // 之前没有该信息时,才记录,防重复
                    if (ibusT == m_mapBus.end())
                    {
                        t.url = href;
                        t.busName = szTmp;
                        m_mapBus.insert(pair<string, Bus_t>(FormatBusLineToOrder(szTmp), t));
                        queBus.push(szTmp);

                        strI.Format("%d",i++);
                        int nIndex=m_list.InsertItem(0xffff,strI,0);
                        m_list.SetItemText(nIndex,1,szTmp);
                        m_pp.SetRange(1,i);
                        m_pp.SetPos(j-1);
                        UpdateWindow();
                    }

                    pos = html.Find(STR_HREF_END);
                }
            }
        }
    }

#ifdef SAVE_FILE
    fclose(fp);
#endif
}

void CTestDlg::SaveToFile()
{
    FILE * fp;
    if( (fp = fopen(GetFileName(), "w+t")) != NULL)
    {
        map<string, Bus_t>::iterator ibus = m_mapBus.begin();
        for (; ibus != m_mapBus.end(); ++ibus)
        {
            Bus_t & t = ibus->second;
            SaveToFile(fp, t);
        }

        fclose(fp);
    }
}


// 2011-2-9 begin
void CTestDlg::SaveToFile(FILE * fp, Bus_t & t)
{
    char szCont[128];   // 需要谨慎,防止数组越界
    // 线路
    fwrite(t.busName.c_str(), sizeof(char), t.busName.length(), fp);
    fwrite("/n", 1, 1, fp);
   
    // 简介
    fwrite(" ", 1, 1, fp);
    fwrite(t.info.c_str(), sizeof(char), t.info.length(), fp);

    list<string>::iterator iSt;

    // 判断是单向还是 双向 线路
    if (!t.stationsGo.empty())
    {
        sprintf(szCont, "/n 去程/n ");
        fwrite(szCont, 1, strlen(szCont), fp);
       
        for (iSt=t.stationsGo.begin(); iSt!=t.stationsGo.end(); ++iSt)
        {
            fwrite(iSt->c_str(), 1, iSt->length(), fp);
            fwrite(" ", 1, 1, fp);
        }
        sprintf(szCont, "%d站/n 回程/n ", t.stationsGo.size());
        fwrite(szCont, 1, strlen(szCont), fp);
    }
    else
    {
        sprintf(szCont, "/n 单向行驶/n ");
        fwrite(szCont, 1, strlen(szCont), fp);
    }
   
    for (iSt=t.stationsBack.begin(); iSt!=t.stationsBack.end(); ++iSt)
    {
        fwrite(iSt->c_str(), 1, iSt->length(), fp);
        fwrite(" ", 1, 1, fp);
    }
    sprintf(szCont, "%d站/n", t.stationsBack.size());
    fwrite(szCont, 1, strlen(szCont), fp);
}

char * StrToInt(const char * str, int & n)
{
    n = 0;
    char * s = (char *)str;
    while (*s && (*s<'0'|| *s>'9'))
        ++s;

    while (*s && *s>='0' && *s<='9') {
        n = n*10 + *s - '0';
        ++s;
    }

    return s;
}

string FormatBusLineToOrder(const char sLine[])
{
    char sNum[10];
    char sFormat[128];
    const char *p = sLine;
    char *q = sFormat;
    while(*p!='/0')
    {
        while(*p!='/0' && (*p<'0' || *p>'9') )
            *q++ = *p++;
       
        int n;
        while(*p!='/0' && *p>='0' && *p<='9')
        {
            p = StrToInt(p, n);
            *q = '/0';
            q += sprintf(sNum, "%03d", n);
            strcat(sFormat, sNum);
        }
    }
   
    *q = '/0';

    return sFormat;
}

char * CTestDlg::GetFileName()
{
    return ::GetFileName(STR_HTTP_URL);
}

// url 是以'http://' 开始的网址
char * GetFileName(const char * url)
{
    const int SIZE = 128;
    static char szFile[SIZE+1];
    if (strlen(url) < SIZE)
    {
        strcpy(szFile, url);
    }
    else
    {
        strncpy(szFile, url, SIZE);
        szFile[SIZE] = '/0';
    }
    char *p = szFile;
    char *q = szFile+7; // 7 is the length of 'http://'
    while(*p!='/0' && *p!='.')
        ++p;
    if (*p == '.')
    {
        ++p;
        *p++ = 't';
        *p++ = 'x';
        *p++ = 't';
        *p = '/0';
    }

    return q;
}


// 2011-2-9 end

// StripTags() rips through a buffer and removes HTML tags from it.
// The function uses a static variable to remember its state in case
// a HTML tag spans a buffer boundary.
void StripTags(LPTSTR pszBuffer)
{
 static BOOL bInTag = FALSE;
 LPTSTR pszSource = pszBuffer;
 LPTSTR pszDest = pszBuffer;

 while (*pszSource != '/0')
 {
  if (bInTag)
  {
   if (*pszSource == '>')
    bInTag = FALSE;
   pszSource++;
  }
  else
  {
   if (*pszSource == '<')
    bInTag = TRUE;
   else
   {
    *pszDest = *pszSource;
    pszDest++;
   }
   pszSource++;
  }
 }
 *pszDest = '/0';
}


void SplitStations(char * pstation, list<string>& lstStation)
{
    const int LENTH = 256;
    char st[LENTH];    // 站台名称不应该太长
    int i;
    while(*pstation != '/0')
    {
        i = 0;
        while(*pstation && *pstation == ' ') pstation++;    // 过滤前导空格
        while(*pstation && *pstation != '-' && i<LENTH)
            st[i++] = *pstation++;

        // 异常处理
        if (i==LENTH)   break;

        while (i>1 && st[i-1]==' ') i--;    // 过滤末尾空格

        st[i] = '/0';
        if (*pstation == '-') pstation++;
       
        lstStation.push_back(st);
    }
   
    // 删除 最后一个站台中的 "(xx站)"  信息
    if (!lstStation.empty())
    {
        CString strTotal;
        strTotal.Format("(%d站)", lstStation.size());
        list<string>::iterator iSt = --lstStation.end();
        size_t pos = iSt->find(strTotal);
        if(pos != string::npos)
        {
            iSt->resize(pos);
            while(iSt->at(iSt->length()-1)==' ')
                iSt->erase(iSt->length()-1);
        }
    }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值