您可以使用Request对象的链接重定向属性。
const puppeteer = require ('puppeteer')
const url = 'http://doodle.google.com/'
;(async () => {
const browser = await puppeteer.launch({
args: ['--no-sandbox','--disable-setuid-sandbox'],headless: true
})
const page = (await browser.pages())[0]
// get the response object of the initial URL
const response = await page.goto(url,{timeout: 0,waitUntil: 'domcontentloaded'})
// get the first response header
console.log ( response.headers() )
// get page title of initial page
const title1 = await page.title()
const chain = response.request().redirectChain()
// If the page redirected,all of chained response headers will be shown here
for ( let num in chain ) {
console.log( chain[num].response().headers() )
// console.log(chain[0].url()) // => print the URL
}
// get page title of final page
const title2 = await page.title()
})()
,
仔细检查后发现,某些重定向可能会(通过脚本)在前端强制执行,因此可能无法在标准重定向链中捕获。因此,我对Edi的建议没有成功。
因此,这是我需要进行更改以使事情正常工作的原因:
- 使用响应事件处理程序
- 等待很长时间(30到45秒),以确保您在重定向后捕获了相关的响应。您可以根据需要调整时间长度。
就我而言,我试图确定是否启用了gzip,因此我需要在最终URL上使用有效的响应对象。这是修改后的代码:
// define url and host
var url_str = 'https://www.example.com';
var url_host_str = 'example.com';
// define GZIP test function
var _checkGZIP = function(resp_headers_obj)
{
var resp_header_content_encoding_str = resp_headers_obj['content-encoding'];
var is_gzip_bool = !!(/gzip/i.test(resp_header_content_encoding_str));
return is_gzip_bool;
};
const browser = await puppeteer.launch({args: ['--no-sandbox',headless: true});
const page = await browser.newPage();
// set result variable outside event handler scope
var is_gzip_bool = false;
/**
* Set response event handler
* This will capture all responses from the initial URL and from final URL
*/
page.on('response',function(response_obj)
{
// get URL and headers
var resp_url_str = response.url();
var resp_headers_obj = response.headers();
if(!is_gzip_bool)
{
// check for only specific URLs
if(/^ *https?\:\/\/([^\?\/]+)(\/|)([^\n\r\?\.]+|) *$/i.test(resp_url_str) && resp_url_str.includes(url_host_str))
{
// do gzip test
is_gzip_bool = _checkGZIP(resp_headers_obj);
}
}
}
// go to page
await page.goto(url_str,{timeout: PAGE_TIMEOUT_GOTO_MS,waitUntil: 'domcontentloaded'});
// wait for a long while to capture all relevant responses [from both initial and final URL]
await page.waitFor(30000);
// document your result if required
// close browser
await browser.close();
本文链接:https://www.f2er.com/3164342.html