Coder Social home page Coder Social logo

Comments (21)

frtelg avatar frtelg commented on July 28, 2024 6

Could you please create a release for this bugfix?

from nwsapi.

milahu avatar milahu commented on July 28, 2024 1

can you try ... ?

document.querySelectorAll('.some-class .another-class _:-ms-fullscreen')

from nwsapi.

milahu avatar milahu commented on July 28, 2024 1

still a bug in nwsapi 2.2.0

the error is emited over and over, but the function does not return

'.lmt--web .lmt__side_container--target .lmt__ad_charLimit_container _:-ms-fullscreen,:root .lmt--web .lmt__side_container--target .lmt__ad_charLimit_container' is not a valid selector

easiest fix is to add return ''; in the switch (symbol) default case

// nwsapi.js

  compileSelector =
    function(expression, source, mode, callback, not) {

// ...

      while (selector) {

// debug
console.log(`compileSelector: while (selector): selector = ${selector}`)
// selector oscillates:
selector == ' _:-ms-fullscreen' || '_:-ms-fullscreen'

        // get namespace prefix if present or get first char of selector
        symbol = STD.apimethods.test(selector) ? '|' : selector[0];

// symbol oscillates:
symbol == ' ' || '_'

        switch (symbol) {

// ...


// debug
console.log(`default case: emit: ` + '\'' + selector_string + '\'' + qsInvalid)

          emit('\'' + selector_string + '\'' + qsInvalid);

// quickfix
          //return '';

        // end of switch symbol

// match oscillates:
match == [
  ' _:-ms-fullscreen',
  index: 0,
  input: ' _:-ms-fullscreen',
  groups: undefined
] || [
  ' _:-ms-fullscreen',
  index: 0,
  input: ' _:-ms-fullscreen',
  groups: undefined

        if (!match) {
          emit('\'' + selector_string + '\'' + qsInvalid);
          return '';

        // pop last component
        selector = match.pop();
      // end of while selector
400 lines to reproduce
#!/usr/bin/env node --trace-warnings

const fetchCss = true; // true -> trigger bug
// cache file: ~/.cache/deepl-client-jsdom/https/*.css
// nwsapi.js: default case before 'end of switch symbol'
// emit: '.lmt--web .lmt__side_container--target .lmt__ad_charLimit_container _:-ms-fullscreen,:root .lmt--web .lmt__side_container--target .lmt__ad_charLimit_container' is not a valid selector

var t1script = new Date().getTime();

// parse CLI args
var args = process.argv.slice(2);
//const showDebug = args.includes('--debug');
const showDebug = true;
const showTime = showDebug || args.includes('--time');
args = args.filter(a => a.startsWith('-') == false);
const sourceLang = args[0] || 'en'; // TODO how to auto-detect sourceLang?
const targetLang = args[1] || 'de';
if (showDebug) console.dir({ sourceLang, targetLang });

// debug
const sourceText = 'hello world'

function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));

async function main() {

var t1 = new Date().getTime();
const jsdom = await import('jsdom');
if (showTime) console.log(`time: ${(new Date().getTime() - t1) / 1000} sec to require jsdom`)

const fs = await import('fs');
const path = await import('path');

var t1 = new Date().getTime();
const getPath = (await import('platform-folders')).default.default;
if (showTime) console.log(`time: ${(new Date().getTime() - t1) / 1000} sec to require getPath`)

const cacheDir = getPath('cache') + '/deepl-client-jsdom';
const userAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36";

const targetTextSelector = '.lmt__target_textarea';
const sourceTextSelector = '.lmt__source_textarea';
// sourceTextField: e.querySelector(".lmt__source_textarea"),

// TODO split large inputs in chunks under 5000 chars
// TODO support google translate, to compare translations
// TODO support xml for google translate (and for deepl which breaks xml in rare cases)
// TODO run as daemon -> reduce latency?
// TODO use original interface to fix translations?

function showHelp() {
  const scriptPath = path.relative(process.cwd(), process.argv[1]);
  console.log(`usage:\necho INPUT | node ${scriptPath} [--debug] [SOURCE_LANG] [TARGET_LANG]`);
  console.log(`\nsample:\necho "hello world" | node ${scriptPath} en de`);

const { stdin } = process;
async function getStdin() {
  let result = '';
  if (stdin.isTTY) {
    return result;
  for await (const chunk of stdin) {
    result += chunk;
  return result;

// TODO refactor?
function makeRequest(url, body, headers) {
  const req = {
    href: url,
    response: { headers },
    getHeader: () => undefined, // jsdom: req.getHeader("referer"),
    then: callback => callback(body),
  return req;

// TODO refactor?
class MockFirstRequest {
  constructor(url, cachePath) {
    this.href = url;
    this.cachePath = cachePath;
    this.response = { headers: {} };
  getHeader(key) { return this.response.headers[key]; } // jsdom: req.getHeader("referer"),
  then(callback) {
    let req;
    var res = Promise.resolve();
    res = res.then(() => (req = jsdom.ResourceLoader.prototype.fetch(this.href, options)));
    res = res.then(buffer => buffer.toString());
    res = res.then(body => {
      this.response.headers = req.response.headers;
      if (showDebug) console.dir({ firstRequest: { url: this.href, headers: req.response.headers } });
      const cachePathHead = this.cachePath + '.head.json';
      if (showDebug) console.dir({ cachePathHead });
      fs.writeFileSync(this.cachePath, body, 'utf8');
      fs.writeFileSync(cachePathHead, JSON.stringify(req.response.headers), 'utf8');
      return callback(body);
    return res;

class CustomResourceLoader extends jsdom.ResourceLoader {

  // cache and patch requests
  // a simple version of

  isFirstRequest = true;

  fetch(url, options) {

    // ignore styles
    // workaround for a bug in jsdom
    if (fetchCss == false && url.endsWith('.css')) {
      if (showDebug) console.log(`fetch: ignore ${url}`);
      return Promise.resolve(Buffer.from(''));

    const cachePath = cacheDir + '/' + url.replace(/:/g, '/').replace(/\/{2,}/g, '/');
    //if (showDebug) console.log(`fetch: cachePath = ${JSON.stringify(cachePath)}`);

    // read from cache
    if (fs.existsSync(cachePath)) {
      if (showDebug) console.log(`fetch: use cached ${cachePath}`);
      let body = fs.readFileSync(cachePath, 'utf8');
      // handle first request
      // needs http headers: content-type, set-cookie
      if (showDebug && url.endsWith('.js')) {
        body = body.replace(/dbg ?= ?!1,/g, 'dbg=true,');
      if (this.isFirstRequest) {
        const cachePathHead = cachePath + '.head.json';
        if (showDebug) console.dir({ cachePathHead });
        const headers = JSON.parse(fs.readFileSync(cachePathHead, 'utf8'));
        if (showDebug) console.dir({ firstRequest: { url, headers } });
        this.isFirstRequest = false;
        return makeRequest(url, body, headers);
      return Promise.resolve(Buffer.from(body));

    // write to cache
    fs.mkdirSync(path.dirname(cachePath), { recursive: true });
    if (showDebug) console.log(`fetch: download ${JSON.stringify(url)}`);

    // handle first request
    // needs content-type http header
    if (this.isFirstRequest) {
      this.isFirstRequest = false;
      const req = new MockFirstRequest(url, cachePath);
      return req;

    var res = Promise.resolve();
    res = res.then(() => super.fetch(url, options));
    res = res.then(buffer => buffer.toString());
    res = res.then(body => {
      if (url.endsWith('.js')) {
        // workaround for a bug in deepl
        // deepl will test String(document.querySelectorAll)
        // browser: "function querySelectorAll() { [native code] }"
        // jsdom: "function querySelectorAll(...) { ... JS code ... }"
        // patch persistent (write to cache)
        body = body.replace(
          String.raw`Q=/^[^{]+\{\s*\[native \w/,`,
          `Q = { test: f => typeof f == 'function' },`
        /* TODO patch?$2878b2.js
                function Et(e) {
                  return e && /^function fetch\(\)\s+\{\s+\[native code\]\s+\}$/.test(e.toString());

        // Error: Uncaught [TypeError: performance.getEntriesByName is not a function]
        body = body.replace(
          'performance.getEntriesByName && performance.getEntriesByName(',

      fs.writeFileSync(cachePath, body, 'utf8');
      // patch dynamic (only in debug mode)
      if (url.endsWith('.js') && showDebug) {
        body = body.replace(/dbg ?= ?!1,/g, 'dbg=true,');
      //return body;
      return Buffer.from(body);
    return res;

const resourceLoader = new CustomResourceLoader({
  strictSSL: false,

const virtualConsole = new jsdom.VirtualConsole();

if (showDebug) virtualConsole.sendTo({
  log: (...args) => {
    if (args[0] == '[FeatureManager]') return; // make deepl debug less verbose

const options = {
  resources: resourceLoader,
  runScripts: 'dangerously',
  pretendToBeVisual: true,
  cookieJar: new jsdom.CookieJar(),
  beforeParse: (window) => {
    // mock window.matchMedia
    Object.defineProperty(window, 'matchMedia', {
      writable: true,
      value: query => ({
        matches: false,
        media: query,
        onchange: null,
        addListener: () => false, // Deprecated
        removeListener: () => false, // Deprecated
        addEventListener: () => false,
        removeEventListener: () => false,
        dispatchEvent: () => false,

const cookieRoundSeconds = 60 * 60 * 24; // round down to start of the day
function setCookieDone(error, cookie) {
  if (error) console.log(`set cookie error: ${error}`);
  v: '1',
  t: Math.floor(new Date().getTime() / 1000 / cookieRoundSeconds) * cookieRoundSeconds,
  m: 'STRICT',
}))}`, '', setCookieDone);

//   / -> \/
//   \ -> \\
// TODO escape more? pipe is sentence delimiter
                  var r = decodeURIComponent(n)
                      .replace(/[\\]./g, function (e) {
                        return "\\\\" === e ? "{_BACKSLASH_}" : e;
                      .replace(/([^\\])[/]/g, "$1{_DELIM_PART_}")
                      .replace(/([^\\])[|]/g, "$1{_DELIM_SENTENCE_}")
                      .replace(/\\([/|])/g, "$1")
                      .replace(/{_BACKSLASH_}/g, "\\"),
function deeplBackslashEncode(str) {
  let res = '';
  for (let i = 0; i < str.length; i++) {
    const char16bit = str[i];
    const code = char16bit.charCodeAt(0);
    res += (
      (code == 47) ? '\\/' : // forward slash
      (code == 92) ? '\\\\' : // backslash
  return res;

async function waitFor(parentNode, selector, timeoutMs) {
  const stepMs = 100;
  if (!timeoutMs) timeoutMs = 100 * stepMs;
  for (let round = 0; round < Math.ceil(timeoutMs/stepMs); round++) {
    const e = parentNode.querySelector(selector);
    if (e) return e;
    if (showDebug) console.log('waiting for ' + selector);
    await sleep(stepMs);
  if (showDebug) console.log('error: timeout waiting for ' + selector);
  return null; // timeout

async function clickElement(selector, sleepMs) {
  if (showDebug) console.log(`clickElement: selector ${selector}`);
  var elem = await waitFor(dom.window.document, selector);
  var event = new dom.window.Event('click', { bubbles: true, cancelable: true });
  if (sleepMs) await sleep(sleepMs);

async function setTextarea(selector, text) {
  if (showDebug) console.log('setTextarea: selector ' + selector);
  const textarea = await waitFor(dom.window.document, selector);
  textarea.value = text;
  var event = new dom.window.Event('paste', { bubbles: true, cancelable: true });
  event.clipboardData = { getData: () => text };

  var t1 = new Date().getTime();
  //const sourceText = await getStdin(); // debug
  if (showTime) console.log(`time: ${(new Date().getTime() - t1) / 1000} sec to read stdin`)

  if (sourceText.trim().length == 0) showHelp();
  if (showDebug) console.log(`stdin = ${JSON.stringify(sourceText)}`);
  if (showDebug) console.log(`stdin.length = ${sourceText.length}`);

  const url = `${sourceLang}/${targetLang}/${encodeURIComponent(deeplBackslashEncode(sourceText))}`;
  if (showDebug) console.dir({ url });

  if (showDebug) console.log(`fetch: cacheDir = ${cacheDir}`);

  var t1 = new Date().getTime();
  const dom = await jsdom.JSDOM.fromURL(url, options);
  if (showTime) console.log(`time: ${(new Date().getTime() - t1) / 1000} sec to load jsdom.fromURL`)

  if (!dom) {
    console.log('error: failed to load dom');
    return; // let jsdom run, TODO timeout 1 sec -> exit?

  if (showDebug) console.log(dom.window.document.cookie.split('; ').map(c => `received cookie: ${c}`).join('\n'));

  function waitForDocument() {
    return new Promise(resolve => {
      dom.window.addEventListener("load", () => resolve());
  var t1 = new Date().getTime();
  await waitForDocument();
  if (showTime) console.log(`time: ${(new Date().getTime() - t1) / 1000} sec to load document`)

  var t1 = new Date().getTime();
  if (showDebug) console.log(`get target text`);
  const stepMs = 100;
  const timeoutMs = 100 * stepMs;
  const targetTextElement = await waitFor(dom.window.document, targetTextSelector);
  let targetText;
  for (let round = 0; round < Math.ceil(timeoutMs/stepMs); round++) {
    targetText = targetTextElement.value.endsWith('\r\n')
      ? targetTextElement.value.slice(0, -2)
      : targetTextElement.value
    if (showDebug) console.dir({ time: round * stepMs / 1000, targetText });
    if (targetText.trim().length > 0) {
      break; // found
    await sleep(stepMs);
  if (showTime) console.log(`time: ${(new Date().getTime() - t1) / 1000} sec to get targetText`)
  if (showTime) console.log(`time sum: ${(new Date().getTime() - t1scriptRun) / 1000} sec to get targetText\n`)
  if (showTime) console.log(`time sum: ${(new Date().getTime() - t1script) / 1000} sec to parse script and get targetText\n`)


  process.exit(0); // quickfix. otherwise jsdom keeps running

if (showTime) console.log(`time sum: ${(new Date().getTime() - t1script) / 1000} sec to parse script\n`)
var t1scriptRun = new Date().getTime();


from nwsapi.

Bondarenko199 avatar Bondarenko199 commented on July 28, 2024 1

I have the same issue using testing-library-react. Currently using its methods I run into an issue where this error occurs. A string grows to millions of symbols in this block of code.

case '~': match = selector.match(Patterns.relative); source = 'n=e;while((e=e.previousElementSibling)){' + source + '}e=n;'; break;

from nwsapi.

dperini avatar dperini commented on July 28, 2024 1

we have to distinguish reality from fantasy.
I believe the tools you mention are perfect for such long selectors.
I really hope nwsapi is used with a complete different scope in mind.
You are talking about selector length now not by resolver "source" length.
In this case I would also set a limit to selector length and return an empty Array.
This will increase speed of the selector engine 10.000 time on these kind of sites. ;)
Thank you for the head up with the infinite recursion it is now fixed and I will close the issue.

from nwsapi.

dperini avatar dperini commented on July 28, 2024

@milahu thank you for reviewing and finding this bug.
I will be working again on "nwsapi" starting 1st week of June,
I will hopefully be fixing all the pending issues including this one.

Sorry for the delay to everybody but too many things went wrong in 2021.

from nwsapi.

dperini avatar dperini commented on July 28, 2024

I went back through all the issues and I am now fixing them one by one.
However, for this issue, I can't reproduce an infinite recursion loop, maybe the changes fixed it.

By using the suggested selector I get returned an error message like the following in the console:

Uncaught DOMException: Document.querySelectorAll: '_:-ms-fullscreen' is not a valid selector

then the process terminates.

from nwsapi.

dperini avatar dperini commented on July 28, 2024

I tried the suggested line you sent with both 2.2.0 (latest published) and with current repository but was unable to reproduce. Also what you said make sense I am sure that after the default/break of the inner loop I pop next selector from the list of fragments.

Could it be you tested that infinite loo with a previous version of "nwsapi" ?
Maybe I am testing/doing something different than you.
Could you post a small example showing the problem ?
Also specify which "nwsapi" version you are using.

Thank you for your time and patience.

from nwsapi.

milahu avatar milahu commented on July 28, 2024

easiest fix is to add return ''; in the switch (symbol) default case

@Bondarenko199 does this help?

from nwsapi.

richard-ling avatar richard-ling commented on July 28, 2024

I agree with the fix proposed by @milahu, it certainly fixed my issue with no side effects.

from nwsapi.

milahu avatar milahu commented on July 28, 2024

fixed in ab9cde1

yay : )

from nwsapi.

dperini avatar dperini commented on July 28, 2024

@milahu sorry for the long delay.
If you can, ensure my minimal variant really work as expected with no regressions.

from nwsapi.

milahu avatar milahu commented on July 28, 2024

all good ^^

i did not test your fix, but LGTM

the problem was that while { switch { break } } did not break the while loop
using goto (break to label) is probably better than return ''; if selector is truthy

edit: worst case, we still get an infinite loop ...

from nwsapi.

milahu avatar milahu commented on July 28, 2024

yepp, this fixes my e2e test in #46 (comment)

short version:

const jsdom = require('jsdom');

test('nwsapi handles invalid selector', async () => {

  const virtualConsole = new jsdom.VirtualConsole();


  const options = {
    runScripts: 'dangerously',

  const html = `
    <!doctype html>
        #e { color: green; }
        body _:-ms-fullscreen { color: red; } /* this will trigger the bug in nwsapi */
      <p id="e">Hello world</p>
        var s = getComputedStyle(document.getElementById('e')); // force parsing of stylesheet
        console.log('color = ' + s.color);

  const dom = new jsdom.JSDOM(html, options);

  function waitForDocument() {
    return new Promise(resolve => {
      dom.window.addEventListener("load", () => resolve());

  var t1 = new Date().getTime();
  await waitForDocument();
  console.log(`time: ${(new Date().getTime() - t1) / 1000} sec to load document`)

to abort early, i added this to nwsapi.js

      while (selector) {

        console.log(`source.length = ${source.length}`);

        if (source.length > 10000) {
          // otherwise source grows to 1GB
          throw `infinite loop? source.length = ${source.length}`;

cannot reproduce with unit test

// __tests__/invalid-selector.js
// npx jest -- __tests__/invalid-selector.js

 * @jest-environment node

const nwsapi = require('../nwsapi');
const { JSDOM } = require('jsdom');

test('handles invalid selector', () => {

  const dom = new JSDOM('<!doctype html><p>Hello world</p>');

  const NW = nwsapi(dom.window);

  var selector = '_:-ms-fullscreen';
  // SyntaxError: unknown pseudo-class selector ':-ms-fullscreen'

  //var selector = '.lmt--web .lmt__side_container--target .lmt__ad_charLimit_container _:-ms-fullscreen';
  // SyntaxError: unknown pseudo-class selector ':-ms-fullscreen'

  var mode = null;
  var callback = (...args) => console.dir(args);

  var testElement = NW.compile(selector, mode, callback);



0195e47 and later throw

SyntaxError: unknown pseudo-class selector ':-ms-fullscreen'

b462258 and earlier throw

SyntaxError: 'undefined' is not a valid selector

from nwsapi.

richard-ling avatar richard-ling commented on July 28, 2024


from nwsapi.

dperini avatar dperini commented on July 28, 2024

@milahu I agree with that strategy I have never seen a resolver with a size > 1000 bytes.
So a generic limit in size seems a good approach for various reasons, DOS included.
I will run the entire testing to gather how long is the longest lambda we might have.

from nwsapi.

milahu avatar milahu commented on July 28, 2024

a size limit should be still large enough to handle edge cases

maybe 10MB?

some numbers on a 2.4GHz cpu

1 MiB limit → compileSelector throws after 0.05 seconds = 0.1% of max
10 MiB limit → compileSelector throws after 0.4 seconds = 1% of max
100 MiB limit → compileSelector throws after 4 seconds
512 MiB limit → compileSelector throws after 20 seconds
1024 MiB limit → compileSelector throws after 40 seconds

from nwsapi.

dperini avatar dperini commented on July 28, 2024

the following is the longest source string produced by the full w3c tests and it is 288 bytes long

if((((/^form$/i.test(e.localName)&&!e.noValidate)||(e.willValidate&&!e.formNoValidate))&&!e.checkValidity())||(/^fieldset$/i.test(e.localName)&&s.first(":invalid",e))){n=e;while((e=e.parentElement)){if((/^patternConstraints$/.test(e.getAttribute("id")))){r[++j]=c[k];continue main;}}e=n;}

the above source string when converted to a Resolver() becomes 377 bytes long

function Resolver(c,f,x,r){var e,n,o,j=r.length-1,k=-1;main:while((e=c[++k])){if((((/^form$/i.test(e.localName)&&!e.noValidate)||(e.willValidate&&!e.formNoValidate))&&!e.checkValidity())||(/^fieldset$/i.test(e.localName)&&s.first(":invalid",e))){n=e;while((e=e.parentElement)){if((/^patternConstraints$/.test(e.getAttribute("id")))){r[++j]=c[k];continue main;}}e=n;}}return r;}

So I believe that doubling it might be enough or maybe make it 4 times bigger.
I wouldn't suggest going to more than that (max. 2048) to make it useful.

A selector yielding a 10Kb source string is a non-sense, imagine 1 MiB or 10 MiB ....

from nwsapi.

milahu avatar milahu commented on July 28, 2024

So a generic limit in size seems a good approach for various reasons, DOS included.

this also works in the opposite direction:
an attacker can construct a "too long" css selector to crash nwsapi
for example to prevent webscraping with jsdom
web browsers should be error-tolerant

from nwsapi.

dperini avatar dperini commented on July 28, 2024

you surely know better than me those scraping bots and hackers behaviors too.
However, just so I make sure I get your point, can you show me a selector that make sense and produces more than 2048 byte source string ?

from nwsapi.

milahu avatar milahu commented on July 28, 2024
var selector = 'body';
//for (var i = 0; i < 1000; i++) { // 12KB selector // RangeError: Maximum call stack size exceeded
for (var i = 0; i < 820; i++) { // 10KB selector -> source.length = 77017
  selector += ` > .class-${i}`;
console.log(`selector.length = ${selector.length}`);

so its even easier to crash by stack size

an attacker can construct a "too long" css selector to crash nwsapi
for example to prevent webscraping with jsdom

i have to admit, for scraping hostile sites i would not use jsdom
rather a real browser via selenium / puppeteer / playwright ...

from nwsapi.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.