admin 管理员组

文章数量: 887021

Playwright

我有一个小型的 express API,它在 Instagram 上运行一个获取 ImgURL 和 descriptionText 的抓取工具,我已将其移至 ec2 实例,因为我最初试图在 Vercel 无服务器函数上运行它。该函数可以在 Vercel 上运行,但需要超过 10 秒的时间,这在免费版本上会超时。这就是我尝试使用 ec2 的原因,感谢任何帮助。

const express = require('express');
const playwright = require('playwright-aws-lambda');

const app = express();

app.use(express.json());

app.post('/scrape-instagram', async (req, res) => {
  let browser = null;

  const link = req.body.link;
  if (!link.includes('instagram')) {
    throw new Error('Not an Instagram link');
  }

  try {
    browser = await playwright.launchChromium({headless: true});
    const context = await browser.newContext();

    const page = await context.newPage()
    await page.goto(link, {waitUntil: "networkidle"});

    const imgClass = 'img.x5yr21d.xu96u03.x10l6tqk.x13vifvy.x87ps6o.xh8yej3'
    const descClass = 'h1._aacl._aaco._aacu._aacx._aad7._aade'

    const image = await page.waitForSelector(imgClass)
    const imageUrl = await image.getAttribute('src')
    const desc = await page.waitForSelector(descClass)
    const descText = await desc.innerText()

    await browser.close();
    res.status(200).json({imageUrl, descText});
  } catch (error) {
    console.error(error);
    res.status(400).json({error: error.message});
  }
});

app.listen(3000, () => {
  console.log('Server listening on port 3000');
});

然而,我应要求遇到这个错误:

{
    "error": "browserType.launch: Executable doesn't exist at /home/ubuntu/.cache/ms-playwright/chromium-1055/chrome-linux/chrome\n╔═════════════════════════════════════════════════════════════════════════╗\n║ Looks like Playwright Test or Playwright was just installed or updated. ║\n║ Please run the following command to download new browsers:              ║\n║                                                                         ║\n║     npx playwright install                                              ║\n║                                                                         ║\n║ <3 Playwright Team                                                      ║\n╚═════════════════════════════════════════════════════════════════════════╝"
}

包裹在:

  "dependencies": {
    "express": "^4.18.2",
    "playwright-aws-lambda": "^0.10.0",
    "playwright-core": "^1.32.0"
  }

ec2 实例上的节点 v16.19.1。

回答如下:

本文标签: Playwright