我为此开发了一些东西。代码如下。更高级的版本使用代理来拉入外部非 CORS 资源。svgforeignObject
阻止加载任何非 CORS 跨域请求。我编写了一个在 Runkit 上运行的简单代理。见底部。
这样做的限制是:没有外部非 CORS 字体,没有非 CORS 图像。任何想要帮助改进这一点的人,包括添加对图像和字体的支持,都可以在这里做出贡献:https ://github.com/dosyago-coder-0/dompeg.js/blob/master/dompeg.js
网页脚本:
(async function (){
const width = document.scrollingElement.scrollWidth;
const height = document.scrollingElement.scrollHeight;
const doc = document.implementation.createHTMLDocument('');
doc.write(document.documentElement.outerHTML);
doc.documentElement.setAttribute('xmlns', doc.documentElement.namespaceURI);
const styles = [];
for( let i = 0; i < document.styleSheets.length; i++ ) {
const ss = document.styleSheets[i];
if ( ss.cssRules ) {
for( let j = 0; j < ss.cssRules.length; j++ ) {
styles.push( ss.cssRules[j].cssText );
}
} else {
try {
const res = await fetch(ss.href);
const cssText = await res.text();
styles.push(cssText);
} catch(e) {
/** fetch to proxy here as fallback
* uncomment if you set up your proxy server
try {
const res = await fetch(`https://${YOUR PROXY SERVER}.runkit.sh/?url=${btoa(ss.href)}`);
const cssText = await res.text();
styles.push(cssText);
} catch(e) { **/
console.warn(`Exception adding styles from ${ss.href}`, e, e.stack);
/** uncomment if you setup proxy
}
**/
}
}
}
Array.from( doc.querySelectorAll('noscript, link, script')).forEach( el => el.remove() );
stripComments(doc);
Array.from( doc.querySelectorAll('*[style]')).forEach( el => {
const styleText = el.getAttribute('style');
const uniq = (Math.random()+''+performance.now()).replace(/\./g,'x');
const className = `class${uniq}`;
const cssText = `.${className} {${ styleText }}`;
styles.push( cssText );
el.classList.add( className );
});
const styleElement = doc.createElement('style');
styleElement.innerText = styles.join('\n');
doc.documentElement.appendChild(styleElement);
const canvas = document.createElement('canvas');
Object.assign( canvas, {width,height});
const ctx = canvas.getContext('2d');
const data = `
<svg xmlns="http://www.w3.org/2000/svg" width="${width}" height="${height}">
<foreignObject width="100%" height="100%">
${(new XMLSerializer).serializeToString(doc).slice(15)}
</foreignObject>
</svg>`;
const DOMURL = window.URL || window.webkitURL || window;
const img = new Image();
const svg = new Blob([data], {type: 'image/svg+xml'});
Object.assign( img, {width,height});
img.crossOrigin = "Anonymous";
img.onload = function() {
ctx.fillStyle = 'white';
ctx.fillRect( 0, 0, canvas.width, canvas.height );
ctx.drawImage(img, 0, 0);
const datauri = canvas.toDataURL('image/jpeg');
const anchor = document.createElement('a');
anchor.download = 'screen.jpg';
anchor.href = datauri;
anchor.target = "_new";
anchor.innerText = 'download screen.jpg';
anchor.addEventListener('click', e => {e.stopPropagation();anchor.remove();}, { capture: true });
document.body.appendChild(anchor);
Object.assign( anchor.style, {
position: 'fixed',
background:'white',
fontSize: '18px',
fontFamily: 'monospace',
color: 'blue',
top: 0,
left: 0,
zIndex: Number.MAX_SAFE_INTEGER
});
}
img.src = buildSvgImageUrl(data);
img.style.position = "absolute";
img.style.zIndex = "10000000";
img.style.backgroundColor = "white";
//document.body.appendChild(img);
function buildSvgImageUrl(svg) {
const b64 = btoa(unescape(encodeURIComponent(svg)));
return "data:image/svg+xml;base64," + b64;
}
function stripComments(docNode){
const commentWalker = docNode.evaluate('//comment()', docNode, null, XPathResult.ANY_TYPE, null);
let comment = commentWalker.iterateNext();
const cuts = [];
while (comment) {
cuts.push(comment);
comment = commentWalker.iterateNext();
}
cuts.forEach( node => node.remove());
}
}());
runkit 代理服务器脚本:
const request = require("request");
const rp = require('request-promise');
const {URL} = require('url');
const express = require("@runkit/runkit/express-endpoint/1.0.0");
const b64 = require('base-64');
const bodyParser = require('body-parser');
const page = (url,err) => `
<form method=POST style="
position: fixed;
position: sticky;
display: table;
top: 0px;
z-index:12000000;
background: white;">
<label for=hider99>X</label><input id=hider99 type=checkbox>
<style>
#hider99:checked ~ fieldset {
display: none;
}
</style>
<fieldset><legend>Proxy</legend>
<p>
<input required type=url size=62 name=url placeholder="any url" value="${url||'https://google.com/favicon.ico'}">
<button style=background:lime>Load</button>
${ !! err ? `<p><span style=color:red>${err}</span>` : '' }
</fieldset>
</form>`;
const app = express(module.exports);
app.use(bodyParser.urlencoded({ extended: false }));
app.get("/", async (req,res,next) => {
console.log(req.query.url);
let url;
res.type('html');
res.set('access-control-allow-origin', '*');
try {
url = b64.decode(req.query.url);
new URL(url);
} catch(e) { res.end(page('',"not a url"+e)); return; }
try {
res.type(type(url));
const data = await rp(url);
res.end(data);
} catch(e) { res.end(page('',""+e)); }
});
app.get("/:anything", async (req,res,next) => {
res.type('html');
res.end('404 Not found');
});
function type(s = '') {
return s.split(/\./g).pop() || 'html';
}
void 0;