Sub clawData()
Dim companies() As String
Dim rowNum As Long
Dim keywords As String
Dim ff As Long
companyNum = Sheets("Sheet1").Range("g65536").End(xlUp).Row
keywordsGeneral = getGeneralKeyWordsGeneral()
keywordsEnvironment = getGeneralKeyWordsEnvironment()
keywordsSocial = getGeneralKeyWordsSocial()
keywordsGovernance = getGeneralKeyWordsGovernance()
companies() = getCompanyList()
For ff = 0 To companyNum Step 1
If companies()(ff) <> "" Then
keywordsGeneral = clawResult(CStr(keywordsGeneral), "General", CStr(companies()(ff)), ff * 4)
keywordsGeneral = clawResult(CStr(keywordsEnvironment), "Environment", CStr(companies()(ff)), ff * 4 + 1)
keywordsGeneral = clawResult(CStr(keywordsSocial), "Social", CStr(companies()(ff)), ff * 4 + 2)
keywordsGeneral = clawResult(CStr(keywordsGovernance), "Governance", CStr(companies()(ff)), ff * 4 + 3)
End If
Next
Shell ("taskkill /f /im IEXPLORE.exe")
4
End Sub
Function GetChs(strInput As String) As String
Dim regEx As Object
Set regEx = CreateObject("VBSCRIPT.REGEXP")
regEx.Pattern = "[^\u4e00-\u9fa5]"
regEx.IgnoreCase = True
regEx.Global = True
GetChs = regEx.Replace(strInput, "")
Set regEx = Nothing
End Function
Function getGeneralKeyWordsGeneral() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("h65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 8))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsGeneral = strs
End Function
Function getGeneralKeyWordsEnvironment() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("i65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 9))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsEnvironment = strs
End Function
Function getGeneralKeyWordsSocial() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("j65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 10))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsSocial = strs
End Function
Function getGeneralKeyWordsGovernance() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("k65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 11))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsGovernance = strs
End Function
Function getCompanyList() As String()
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1(100) As String
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("g65536").End(xlUp).Row Step 1
'strT = GetChs(CStr(Sheets("Sheet1").Cells(i, 2)))
strT = CStr(Sheets("Sheet1").Cells(i, 7))
If strT <> "" Then
array1(i - 2) = strT
End If
Next
getCompanyList = array1()
End Function
Function urlVerify(url As String) As Long
Dim result As Long
result = 1
IFind = InStr(url, ".pdf")
IFind2 = InStr(url, ".doc")
IFind3 = InStr(url, ".xls")
IFind4 = InStr(url, ".xlsx")
IFind5 = InStr(url, ".ppt")
If IFind = 0 And IFind2 = 0 And IFind3 = 0 And IFind4 = 0 And IFind5 = 0 Then
result = 0
End If
urlVerify = result
End Function
Function clawResult(keywords As String, keyWordsType As String, companyName As String, companyLine As Long) As String
Dim ie, dmt, tb, i&, j&, a&, strx2 As String, ie2, dmt2, tb2, i2&
For a = 0 To 4
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = False
.navigate "https://www.google.com.hk/search?q=" + keywords + "+%22+" + companyName + "%22&lr=lang_ja&newwindow=1&safe=strict&hl=zh-CN&as_qdr=all&tbs=lr:lang_1ja&ei=1LhIVKeUFc3W7Qb_oIGABQ&start=" + CStr(a) + "0&sa=N&biw=1920&bih=1016" '??§????3???§??3??
Do Until .ReadyState = 4
DoEvents
Loop
Set dmt = .document
If TypeName(dmt) <> "AcroPDF" Then
Set tb = dmt.all.tags("h3")
For i = 0 To tb.Length - 1
strx = Split(tb.Item(i).innerHTML, "href=")
strx2 = Split(strx(1), """")(1)
Cells(companyLine * 50 + a * 10 + 2 + i, 1) = strx2
Cells(companyLine * 50 + a * 10 + 2 + i, 2) = companyName
Cells(companyLine * 50 + a * 10 + 2 + i, 3) = tb.Item(i).innertext
Cells(companyLine * 50 + a * 10 + 2 + i, 4) = keyWordsType
IFind = urlVerify(strx2)
If IFind = 0 Then
Set ie2 = CreateObject("InternetExplorer.Application")
With ie2
.Visible = False
.navigate strx2
Do Until .ReadyState = 4 Or .busy = False
DoEvents
Loop
Set dmt2 = .document
If TypeName(dmt2) <> "AcroPDF" Then
Set tb2 = dmt2.all.tags("p")
For i2 = 0 To tb2.Length - 1
strs2 = strs2 & vbCrLf & tb2.Item(i2).innertext
Next
Cells(companyLine * 50 + a * 10 + 2 + i, 5) = strs2
strs2 = ""
End If
End With
End If
Next
End If
End With
Next
Shell ("taskkill /f /im IEXPLORE.exe")
newHour = Hour(Now())
newMinute = Minute(Now())
newSecond = Second(Now()) + 5
waitTime = TimeSerial(newHour, newMinute, newSecond)
Application.Wait waitTime
clawResult = ""
End Function
Dim companies() As String
Dim rowNum As Long
Dim keywords As String
Dim ff As Long
companyNum = Sheets("Sheet1").Range("g65536").End(xlUp).Row
keywordsGeneral = getGeneralKeyWordsGeneral()
keywordsEnvironment = getGeneralKeyWordsEnvironment()
keywordsSocial = getGeneralKeyWordsSocial()
keywordsGovernance = getGeneralKeyWordsGovernance()
companies() = getCompanyList()
For ff = 0 To companyNum Step 1
If companies()(ff) <> "" Then
keywordsGeneral = clawResult(CStr(keywordsGeneral), "General", CStr(companies()(ff)), ff * 4)
keywordsGeneral = clawResult(CStr(keywordsEnvironment), "Environment", CStr(companies()(ff)), ff * 4 + 1)
keywordsGeneral = clawResult(CStr(keywordsSocial), "Social", CStr(companies()(ff)), ff * 4 + 2)
keywordsGeneral = clawResult(CStr(keywordsGovernance), "Governance", CStr(companies()(ff)), ff * 4 + 3)
End If
Next
Shell ("taskkill /f /im IEXPLORE.exe")
4
End Sub
Function GetChs(strInput As String) As String
Dim regEx As Object
Set regEx = CreateObject("VBSCRIPT.REGEXP")
regEx.Pattern = "[^\u4e00-\u9fa5]"
regEx.IgnoreCase = True
regEx.Global = True
GetChs = regEx.Replace(strInput, "")
Set regEx = Nothing
End Function
Function getGeneralKeyWordsGeneral() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("h65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 8))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsGeneral = strs
End Function
Function getGeneralKeyWordsEnvironment() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("i65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 9))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsEnvironment = strs
End Function
Function getGeneralKeyWordsSocial() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("j65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 10))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsSocial = strs
End Function
Function getGeneralKeyWordsGovernance() As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1
array1 = Array()
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("k65536").End(xlUp).Row Step 1
strT = CStr(Sheets("Sheet1").Cells(i, 11))
If strT <> "" Then
d.Add i, strT
End If
Next
For ii = 2 To d.Count + 1 Step 1
If ii = d.Count + 1 Then
strs = strs + d.Item(ii)
Else
strs = strs + d.Item(ii) + "+OR+"
End If
Next
getGeneralKeyWordsGovernance = strs
End Function
Function getCompanyList() As String()
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1(100) As String
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("g65536").End(xlUp).Row Step 1
'strT = GetChs(CStr(Sheets("Sheet1").Cells(i, 2)))
strT = CStr(Sheets("Sheet1").Cells(i, 7))
If strT <> "" Then
array1(i - 2) = strT
End If
Next
getCompanyList = array1()
End Function
Function urlVerify(url As String) As Long
Dim result As Long
result = 1
IFind = InStr(url, ".pdf")
IFind2 = InStr(url, ".doc")
IFind3 = InStr(url, ".xls")
IFind4 = InStr(url, ".xlsx")
IFind5 = InStr(url, ".ppt")
If IFind = 0 And IFind2 = 0 And IFind3 = 0 And IFind4 = 0 And IFind5 = 0 Then
result = 0
End If
urlVerify = result
End Function
Function clawResult(keywords As String, keyWordsType As String, companyName As String, companyLine As Long) As String
Dim ie, dmt, tb, i&, j&, a&, strx2 As String, ie2, dmt2, tb2, i2&
For a = 0 To 4
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = False
.navigate "https://www.google.com.hk/search?q=" + keywords + "+%22+" + companyName + "%22&lr=lang_ja&newwindow=1&safe=strict&hl=zh-CN&as_qdr=all&tbs=lr:lang_1ja&ei=1LhIVKeUFc3W7Qb_oIGABQ&start=" + CStr(a) + "0&sa=N&biw=1920&bih=1016" '??§????3???§??3??
Do Until .ReadyState = 4
DoEvents
Loop
Set dmt = .document
If TypeName(dmt) <> "AcroPDF" Then
Set tb = dmt.all.tags("h3")
For i = 0 To tb.Length - 1
strx = Split(tb.Item(i).innerHTML, "href=")
strx2 = Split(strx(1), """")(1)
Cells(companyLine * 50 + a * 10 + 2 + i, 1) = strx2
Cells(companyLine * 50 + a * 10 + 2 + i, 2) = companyName
Cells(companyLine * 50 + a * 10 + 2 + i, 3) = tb.Item(i).innertext
Cells(companyLine * 50 + a * 10 + 2 + i, 4) = keyWordsType
IFind = urlVerify(strx2)
If IFind = 0 Then
Set ie2 = CreateObject("InternetExplorer.Application")
With ie2
.Visible = False
.navigate strx2
Do Until .ReadyState = 4 Or .busy = False
DoEvents
Loop
Set dmt2 = .document
If TypeName(dmt2) <> "AcroPDF" Then
Set tb2 = dmt2.all.tags("p")
For i2 = 0 To tb2.Length - 1
strs2 = strs2 & vbCrLf & tb2.Item(i2).innertext
Next
Cells(companyLine * 50 + a * 10 + 2 + i, 5) = strs2
strs2 = ""
End If
End With
End If
Next
End If
End With
Next
Shell ("taskkill /f /im IEXPLORE.exe")
newHour = Hour(Now())
newMinute = Minute(Now())
newSecond = Second(Now()) + 5
waitTime = TimeSerial(newHour, newMinute, newSecond)
Application.Wait waitTime
clawResult = ""
End Function