我正在尝试使用gulp创建一个构建,该构建也利用了crawler。在构建期间,我使用var {promisify} = require("util")
函数以约定的方式使用fs.writeFile
,因此我可以获取一个值。
正在记录数据,但我最终想将数据写入文件!
这是搜寻器设置。
const { src,watch,series,parallel,dest } = require('gulp');
var Crawler = require("crawler");
function iterObj(nestedObj) {
// keep a list of object references to prevent circular references at any level
let objReferences = [];
function rHelper(obj) {
let arr = []
let objValues = Object.values(obj);
// use var to avoid additonal heap overhead of new variable for each iteration
for (var i = 0; i < objValues.length; i++) {
let val = objValues[i];
// check if non null and not a reference we've already seen before (AKA circular reference)
if (val !== null && objReferences.indexOf(val) === -1) {
if (val.src) { arr.push({ src: val.src }) }
if (typeof val === "object" && !Array.isArray(val)) {
// since we're checking this object,add reference to list to make sure it doesn't get
// referenced by any of its children or siblings
objReferences.push(val);
arr.push(...rHelper(val));
}
}
}
return arr;
}
return rHelper(nestedObj);
}
var c = new Crawler({
maxConnections: 10,// This will be called for each crawled page
callback: function(error,res,done) {
if (error) {
console.log(error);
} else {
var $ = res.$;
// $ is Cheerio by default
//a lean implementation of core jQuery designed specifically for the server;
var img = $(`img`);
// add circular references for testing
img[0].attribs.self = img[0].attribs;
img[0].attribs.parent = img[0];
img[0].attribs.top = img;
img[0].next.prev = img[0].prev;
img[0].prev.next = img[0].next;
let data = JSON.stringify(iterObj(img));
console.log("data in crawler ",data);
console.log("typeof img === 'object' && !Array.isArray(img)",typeof img === "object" && !Array.isArray(img))
return data;
}
done();
}
});
module.exports = c
这是我的gulp文件:
var { src,dest,parallel } = require('gulp');
var crawler = require('./src/crawler');
var toJSON = require('gulp-js-to-json');
var fs = require('fs');
const { promisify } = require("util");
const writeFile = promisify(fs.writeFile);
var files = {
getJs: `src/js/**/*.js`,getJSON: `src/json/**/*.json`,toJsPath: `src/js/`,jsonPath: `src/json/`
}
// fetch command line arguments
const arg = (argList => {
let arg = {},a,opt,thisOpt,curOpt;
for (a = 0; a < argList.length; a++) {
thisOpt = argList[a].trim();
opt = thisOpt.replace(/^\-+/,'');
if (opt === thisOpt) {
// argument value
if (curOpt) arg[curOpt] = opt;
curOpt = null;
} else {
// argument name
curOpt = opt;
arg[curOpt] = true;
}
}
return arg;
})(process.argv);
async function writeJS() {
var data = crawlerTask();
await writeFile(`${files.toJsPath}/${arg.name}.js`,data);
}
function crawlerTask() {
return crawler.queue(arg.url)
}
function jsToJSON() {
return src(`${files.toJsPath}/${arg.name}.js`)
.pipe(toJSON({
stringify: {
space: '\t'
}
}))
.pipe(dest(`${arg.name}`))
}
function runCrawler() {
return writeJS().catch(error => console.error(error))
}
exports.runCrawler = runCrawler
但是我回来了
创建的.js
文件的值为undefined
。
有人可以帮忙吗?