这里只爬取前10页的内容:
urlss = StringJoin[
"http://jingyan.baidu.com/user/npublic/?uid=\
d1b612bceb0dc22ba8ffe137&pn=", ToString@#] & /@ Range[0, 602, 7];
urls = Flatten[
Table[StringCases[URLRead[urlss[[n]], "Body"],
"<a href=\"/article/" ~~ Shortest[x__] ~~ "\" title=" :> x], {n,
1, 10}]];
网址 = (StringJoin["https://jingyan.baidu.com/article/", #] & /@
Drop[urls, {2, -1, 2}]);
标题 = Flatten[
Table[StringCases[URLRead[urlss[[n]], "Body"],
".html\" title=\"" ~~ Shortest[x__] ~~ "\"" :> x], {n, 1, 10}]];
Grid[Transpose[{标题, 网址}], Frame -> {All, False}, Alignment -> Right]
运行结果如下: