从本地服务器和ec2-instance爬取网站有何不同?

我的用于爬网图像浏览器的代码在我的远程服务器上运行良好,但是当我检查了所有依赖项时,它在ec2-instance上没有爬网。这是我的相同代码。

Ec2实例是Ubuntu的。除此之外,所有其他文件在实例上都运行良好。我猜的问题是它无法打开该URL进行爬网。安装了Chromium浏览器以及运行代码所需的所有必要模块。请看看它,并帮助我进行调试。


const puppeteer = require('puppeteer');

const {Pool,Client} = require('pg')
const connectionString= "postgres://augli1234:augli1234@kamal1234.c5kamoli1el6.ap-south-1.rds.amazonaws.com:5432/augli";

const client= new Client({
connectionString:connectionString
})

client.connect()
var count=0;
var today = new Date();
var date = today.getFullYear()+'-'+(today.getMonth()+1)+'-'+today.getDate();
console.log(date);
//select distinct article_id from public.content_paraarticle 
client.query("SELECT DISTINCT url,article_id FROM public.content_paraarticle where article_id IN(Select id from public.content_mainarticle where image_url='' AND date BETWEEN '2019-10-01' AND '"+date+"')",(async(err,res,fields)=>{
if (err)  throw err;
for(var i=0;i<res.rows.length;i++)
{
    var str1='.png';
    count++;
    console.log(count);
    if(count>17)
        return process.abort();
    var id=(res.rows[i].article_id);
    var str=id+str1;
    console.log(str);
    var url=(res.rows[i].url);

    if(url.match(/thehindu.com/gi)){

    console.log('Belongs to Hindu');
    console.log(url);
    try{

    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.setViewport({ width: 1280,height: 926 });
    let result
    await Promise.race([
    console.log('..........taken'),result = await page.goto(url,{waitUntil: 'networkidle2',}),new Promise(x => setTimeout(x,30000)),console.log('.........................'),]);

    console.log(result.status());


    const Image = await page.$('body > div.container-main > div.jscroll > div > div > div > section > div > div > div > div:nth-child(2) > div.lead-img-cont > div > picture > img');
    console.log('screenshot started to get taken');

    await Image.screenshot({
    path: str,omitBackground: true,});

    console.log('screenshot taken');
    await browser.close();
    }

    catch(err){

        console.log("err.message");

                fs.appendFile("test.txt","\n"+str+"\n",function(err) {
                if(err) {
                      return console.log(err);
                        }

                 console.log("The file was saved!");
                  }); 
        continue;
        await browser.close();


    }

} 
    else if(url.match(/livemint/gi)){

    console.log('Belongs to livemint');
    console.log(url);
    try{

    const browser = await puppeteer.launch();
    const page = await browser.newPage();
    page.setViewport({ width: 1280,]);

    console.log(result.status());


    const Image = await page.$('figure > img');
    console.log('screenshot started to get taken');

    await Image.screenshot({
    path: str,function(err) {
                if(err) {
                      return console.log(err);
                        }

                 console.log("The file was saved!");
                  }); 
        continue;
        await browser.close();


    }

} 


else {
    console.log("Doesn't belongs to thehindu or livemint");
}
}

client.end()
}));

process.on('unhandledRejection',(reason,p) => {
  console.log('Unhandled Rejection at: Promise',p,'reason:',reason);
  // application specific logging,throwing an error,or other logic here
});
zhoujiang1984 回答:从本地服务器和ec2-instance爬取网站有何不同?

暂时没有好的解决方案,如果你有好的解决方案,请发邮件至:iooj@foxmail.com
本文链接:https://www.f2er.com/3166718.html

大家都在问