mirror of
https://gitee.com/zlgopen/awtk.git
synced 2024-11-29 18:48:09 +08:00
update tools
This commit is contained in:
parent
9dd2ab1302
commit
6e8c9ba39c
@ -30,10 +30,11 @@ var getChunks = function (wordpos, pos, text) {
|
|||||||
// debug(words);
|
// debug(words);
|
||||||
// throw new Error();
|
// throw new Error();
|
||||||
var ret = [];
|
var ret = [];
|
||||||
getChunksCallsNr++;
|
if(getChunksCallsNr > 150) {
|
||||||
if(getChunksCallsNr > 100) {
|
|
||||||
throw "get Chunks error";
|
throw "get Chunks error";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getChunksCallsNr++;
|
||||||
for (var i = 0; i < words.length; i++) {
|
for (var i = 0; i < words.length; i++) {
|
||||||
var word = words[i];
|
var word = words[i];
|
||||||
//debug(word);
|
//debug(word);
|
||||||
|
@ -6,9 +6,10 @@ const Segment = require('segment');
|
|||||||
let allWords = {};
|
let allWords = {};
|
||||||
let doneURLS = {};
|
let doneURLS = {};
|
||||||
let maxURLS = 10000;
|
let maxURLS = 10000;
|
||||||
|
let errorPages = 0;
|
||||||
let reservedPages = maxURLS;
|
let reservedPages = maxURLS;
|
||||||
const maxWordsPerChar = 15;
|
const maxWordsPerChar = 15;
|
||||||
let rootURL = ['http://blog.sina.com.cn/', 'https://blog.csdn.net/'];
|
let rootURL = ['https://www.qisuu.la/du/', 'http://blog.sina.com.cn/', 'https://blog.csdn.net/'];
|
||||||
|
|
||||||
function isValidURL(url) {
|
function isValidURL(url) {
|
||||||
if (url.indexOf('javascript:') >= 0 || url.indexOf('css') >= 0 || url.indexOf(':') > 8) {
|
if (url.indexOf('javascript:') >= 0 || url.indexOf('css') >= 0 || url.indexOf(':') > 8) {
|
||||||
@ -119,7 +120,7 @@ function tidyResult() {
|
|||||||
|
|
||||||
function outputAndQuit() {
|
function outputAndQuit() {
|
||||||
outputWords(tidyResult());
|
outputWords(tidyResult());
|
||||||
|
console.log(`errorPages=${errorPages}`);
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,6 +189,7 @@ function addWords(text) {
|
|||||||
});
|
});
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log(e);
|
console.log(e);
|
||||||
|
errorPages++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -230,4 +232,4 @@ var c = new Crawler({
|
|||||||
callback: onTaskDone
|
callback: onTaskDone
|
||||||
});
|
});
|
||||||
|
|
||||||
c.queue(rootURL);
|
c.queue(rootURL);
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user