mirror of
https://gitee.com/zlgopen/awtk.git
synced 2024-11-29 18:48:09 +08:00
update tools
This commit is contained in:
parent
9dd2ab1302
commit
6e8c9ba39c
@ -30,10 +30,11 @@ var getChunks = function (wordpos, pos, text) {
|
||||
// debug(words);
|
||||
// throw new Error();
|
||||
var ret = [];
|
||||
getChunksCallsNr++;
|
||||
if(getChunksCallsNr > 100) {
|
||||
if(getChunksCallsNr > 150) {
|
||||
throw "get Chunks error";
|
||||
}
|
||||
|
||||
getChunksCallsNr++;
|
||||
for (var i = 0; i < words.length; i++) {
|
||||
var word = words[i];
|
||||
//debug(word);
|
||||
|
@ -6,9 +6,10 @@ const Segment = require('segment');
|
||||
let allWords = {};
|
||||
let doneURLS = {};
|
||||
let maxURLS = 10000;
|
||||
let errorPages = 0;
|
||||
let reservedPages = maxURLS;
|
||||
const maxWordsPerChar = 15;
|
||||
let rootURL = ['http://blog.sina.com.cn/', 'https://blog.csdn.net/'];
|
||||
let rootURL = ['https://www.qisuu.la/du/', 'http://blog.sina.com.cn/', 'https://blog.csdn.net/'];
|
||||
|
||||
function isValidURL(url) {
|
||||
if (url.indexOf('javascript:') >= 0 || url.indexOf('css') >= 0 || url.indexOf(':') > 8) {
|
||||
@ -119,7 +120,7 @@ function tidyResult() {
|
||||
|
||||
function outputAndQuit() {
|
||||
outputWords(tidyResult());
|
||||
|
||||
console.log(`errorPages=${errorPages}`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
@ -188,6 +189,7 @@ function addWords(text) {
|
||||
});
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
errorPages++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -230,4 +232,4 @@ var c = new Crawler({
|
||||
callback: onTaskDone
|
||||
});
|
||||
|
||||
c.queue(rootURL);
|
||||
c.queue(rootURL);
|
||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user