0

I want to get html from a source (link or file , ...) and find values from it. html format is :

<!doctype html>
<html>
<body>
  <main>
    <section id="serp">
      <div>
        <article>a</article>
        <article>b</article>
        <article>c</article>
        <article>d</article>
      </div>
    </section>
  </main>
</body>
</html>

first of all i used cheerio. according to docs i write:

const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content); // null

According to the same procedure i used x-ray and jsdom but all of them print null.

4

1 回答 1

0

我做了以下事情:

let myhtml = `<!doctype html>
<html>
<body>
  <main>
    <section id="serp">
      <div>
        <article>a</article>
        <article>b</article>
        <article>c</article>
        <article>d</article>
      </div>
    </section>
  </main>
</body>
</html>`;

const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content);
console.log(`html: ${content.html()}`);

它将以下内容输出到控制台:

initialize {
  '0': 
   { type: 'tag',
     name: 'article',
     namespace: 'http://www.w3.org/1999/xhtml',
     attribs: {},
     'x-attribsNamespace': {},
     'x-attribsPrefix': {},
     children: [ [Object] ],
     parent: 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     prev: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: null,
        next: [Circular] },
     next: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Circular],
        next: [Object] } },
  '1': 
   { type: 'tag',
     name: 'article',
     namespace: 'http://www.w3.org/1999/xhtml',
     attribs: {},
     'x-attribsNamespace': {},
     'x-attribsPrefix': {},
     children: [ [Object] ],
     parent: 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     prev: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Object],
        next: [Circular] },
     next: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Circular],
        next: [Object] } },
  '2': 
   { type: 'tag',
     name: 'article',
     namespace: 'http://www.w3.org/1999/xhtml',
     attribs: {},
     'x-attribsNamespace': {},
     'x-attribsPrefix': {},
     children: [ [Object] ],
     parent: 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     prev: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Object],
        next: [Circular] },
     next: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Circular],
        next: [Object] } },
  '3': 
   { type: 'tag',
     name: 'article',
     namespace: 'http://www.w3.org/1999/xhtml',
     attribs: {},
     'x-attribsNamespace': {},
     'x-attribsPrefix': {},
     children: [ [Object] ],
     parent: 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     prev: 
      { type: 'text',
        data: '\n        ',
        parent: [Object],
        prev: [Object],
        next: [Circular] },
     next: 
      { type: 'text',
        data: '\n      ',
        parent: [Object],
        prev: [Circular],
        next: null } },
  options: 
   { withDomLvl1: true,
     normalizeWhitespace: false,
     xml: false,
     decodeEntities: true },
  _root: 
   initialize {
     '0': 
      { type: 'root',
        name: 'root',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: null,
        prev: null,
        next: null },
     options: 
      { withDomLvl1: true,
        normalizeWhitespace: false,
        xml: false,
        decodeEntities: true },
     length: 1,
     _root: [Circular] },
  length: 4,
  prevObject: 
   initialize {
     '0': 
      { type: 'tag',
        name: 'div',
        namespace: 'http://www.w3.org/1999/xhtml',
        attribs: {},
        'x-attribsNamespace': {},
        'x-attribsPrefix': {},
        children: [Object],
        parent: [Object],
        prev: [Object],
        next: [Object] },
     options: 
      { withDomLvl1: true,
        normalizeWhitespace: false,
        xml: false,
        decodeEntities: true },
     _root: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] },
     length: 1,
     prevObject: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] } } }
html: a

Process finished with exit code 0
于 2017-08-25T13:07:23.047 回答