用 Node.js 抓取影響合集連結


1
2
3
4
5
6
7
8
9
10
11
{
"name": "linker",
"version": "0.1.0",
"dependencies": {
"colors": "0.6.0-1",
"printf": "0.1.2",
"cheerio": "0.12.0",
"request": "2.22.0",
"async": "0.2.9"
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
var cheerio = require('cheerio'),
request = require('request'),
colors = require('colors'),
printf = require('printf'),
async = require('async');

async.waterfall([
function(callback) {
console.log(printf("%-2s ==> %-50s\n".green, "##", "Finding all matching videos"));
callback(null);
},

function(callback) {
request("http://dmxz.example.net/xiazai/1493.html", function(error, response, html) {
if (error) {
return console.error(error);
}

var $ = cheerio.load(html);

$(".numlist").eq(1).find("ul li a").each(function(i, element) {
var videoNumber = $(this).text(),
videoDownloadURL = $(this).attr('href');

console.log(printf("%-2s ==> %-50s", videoNumber, videoDownloadURL));
console.log(printf("%-2s ==> %-50s\n".green, videoNumber, "Reading video download link"));

callback(null, videoNumber, videoDownloadURL);
});
});
},

function(videoNumber, videoDownloadURL, callback) {
request(videoDownloadURL, function(error, response, html) {
if (error) {
return console.error(error);
}

var $ = cheerio.load(html),
downloadURL = $(".file_name").attr("href");

if (typeof downloadURL === "undefined") {
console.log(printf("%-2s ==> %-10s".red, videoNumber, "Verify code required!"));
}else{
console.log(printf("%-2s ==> %-50s".yellow, videoNumber, $(".file_name").attr("href")));
}
});
}
], function(error, result) {
});