puppeteer用作ssr时渲染队列的尝试

用 puppeteer 可以实现 ssr,但性能堪忧。在我的虚拟机里,即使是简单的页面也需要 1~2 秒才能渲染出来。如果有并发请求,渲染时间会乘以 n。因此,实用性几乎为零。

但是,其他的 ssr 方式又比较麻烦,并且不通用。所以我就考虑,是不是可以对 puppeteer 用作 ssr 的方案进行优化。基本思路如下:

  1. 添加页面缓存。如果页面在缓存之中,并且没有过期,直接用缓存。
  2. 添加任务队列。既然性能不好,那同一时间就只渲染指定数量的页面好了。建立一个请求数组,一个渲染实例数组。所有的请求暂存到请求数组里,所有的渲染实例暂存到渲染实例数组里。如果有请求并且可以渲染,就渲染最先进入的请求。
  3. 添加检查定时器。定时检查请求数组和渲染实例数组。

下面是代码:

ssr.js:

var puppeteer = require("puppeteer");

// In-memory cache of rendered pages. Note: this will be cleared whenever the
// server process stops. If you need true persistence, use something like
// Google Cloud Storage (https://firebase.google.com/docs/storage/web/start).
const RENDER_CACHE = new Map();

// instance 是渲染实例数组
async function ssr(url, instance) {
  console.log("url", url);
  if (RENDER_CACHE.has(url)) {
    if (Date.now() - RENDER_CACHE.get(url).time < 3600 * 1000) {
      console.log('cached')
      return { html: RENDER_CACHE.get(url).html, ttRenderMs: 0 };
    }
  }

  const start = Date.now();
  instance.push(start); // 因为启动chrome需要时间,所以用开始时间占位

  const browser = await puppeteer.launch({headless: true, args:['--no-sandbox']});
  const page = await browser.newPage();
  try {
    // networkidle0 waits for the network to be idle (no requests for 500ms).
    // The page's JS has likely produced markup by this point, but wait longer
    // if your site lazy loads, etc.
    await page.goto(url, { waitUntil: "networkidle0" });
  } catch (err) {
    console.error(err);
    remove(instance, start);
    await browser.close();
    throw new Error("page.goto/waitForSelector timed out.");
  }

  const html = await page.content(); // serialized HTML of page DOM.
  remove(instance, start);
  await browser.close();

  const ttRenderMs = Date.now() - start;
  console.info(`Headless rendered page in: ${ttRenderMs}ms`);

  RENDER_CACHE.set(url, {html, time: Date.now()}); // cache rendered page.

  return { html, ttRenderMs };
}

function remove(instance, ins) {
  instance.map((item, index) => {
    if (item === ins) {
      instance.splice(index, 1);
    }
  });
}

server.js:

const queue = [];
checkLoop(2, queue, instance);
app.use(express.static("./"));

app.get("/*", async (req, res, next) => {
  queue.push({req, res, next});
});

function checkLoop(n, queue, instance) {
  setTimeout(() => {
    // console.info('check:' + queue.length + " " + instance.length)
    if (queue.length && instance.length < n) {
      renderPage(queue.shift(), instance);
      // console.info('render:' + queue.length)
    }
    checkLoop(n, queue, instance);
  }, 100);
}

async function renderPage(request, instance) {
  const { html, ttRenderMs } = await ssr(
    `${request.req.protocol}://shici.tony93-dev.top/`, // 我虚拟机里应用的地址,换成自己应用的地址。比如:`${request.req.protocol}://${request.req.host + request.req.url}`
    instance
  );
  // Add Server-Timing! See https://w3c.github.io/server-timing/.
  request.res.set(
    "Server-Timing",
    `Prerender;dur=${ttRenderMs};desc="Headless render time (ms)"`
  );
  return request.res.status(200).send(html); // Serve prerendered page as response.
}

app.listen(8080, () => console.log("Server started. Press Ctrl+C to quit"));