0

我拥有的一个网站最初是用 PHP 完成的。每次用户在网站上进行特定查询时,它都会向另一个网站发出网络 POST 请求。

function post_request($url, $data, $referer='') {
$data = http_build_query($data);
$url = parse_url($url);

if ($url['scheme'] != 'http') { 
    die('Error: Only HTTP request are supported !');
}

// extract host and path:
$host = $url['host'];
$path = $url['path'];

// open a socket connection on port 80 - timeout: 7 sec
$fp = fsockopen($host, 80, $errno, $errstr, 7);

if ($fp){
    // Set non-blocking mode 
    stream_set_blocking($fp, 0);

    // send the request headers:
    fputs($fp, "POST $path HTTP/1.1\r\n");
    fputs($fp, "Host: $host\r\n");

    if ($referer != '')
        fputs($fp, "Referer: $referer\r\n");

    fputs($fp, "User-Agent: Mozilla/5.0 Firefox/3.6.12\r\n");
    fputs($fp, "Content-type: application/x-www-form-urlencoded\r\n");
    fputs($fp, "Content-length: ". strlen($data) ."\r\n");
    fputs($fp, "Connection: close\r\n\r\n");
    fputs($fp, $data);

    $result = ''; 
    while(!feof($fp)) {
        // receive the results of the request
        $result .= fgets($fp, 128);
    }

   // close the socket connection:
   fclose($fp);
}
else { 
    return array(
        'status' => 'err', 
        'error' => "$errstr ($errno)"
    );
}

// split the result header from the content
$result = explode("\r\n\r\n", $result, 2);

$header = isset($result[0]) ? $result[0] : '';
$content = isset($result[1]) ? $result[1] : '';

// return as structured array:
return array(
    'status' => 'ok',
    'header' => $header,
    'content' => $content
);
}

这种方法没有问题,唯一的问题是使用上述代码需要近 3 个 CPU 才能支持 100 个并发用户。

认为 Node.js 将是一个很好的方法(网络请求将是异步的),我做了以下事情。在 CPU 要求方面有明显的改进(大多数情况下使用单个 CPU,最多 2 个)

function postPage(postPath, postData, postReferal, onReply, out) {
    var post_options = {
          host: 'www.somehost.com',
          port: '80',
          path: postPath,
          method: 'POST',
          headers: {
              'Referer': postReferal,
              'Content-Type': 'application/x-www-form-urlencoded',
              'Content-Length': postData.length,
              'User-Agent': 'Mozilla/5.0 Firefox/3.6.12',
              'Connection': 'close'
          }
      };

    // create request
    var post_req = http.request(post_options, function (res) {
        var reply = '';
        res.setEncoding('utf8');
        res.on('data', function (chunk) {
            reply += chunk;
        });

        res.on('end', function () {
            onReply(reply, out);
        });

        res.on('error', function (err) {
            out.writeHead(500, { 'Content-Type': 'text/html' });
            out.end('Error');
        });
    });

    // post the data
    post_req.write(postData);
    post_req.end();
}

这种情况下的问题是它非常脆弱,大约 20% 的 Web 请求失败。如果用户再次尝试查询,它会起作用,但不是很好的体验。

我正在使用 Windows Azure 网站来托管上述两种解决方案。

现在,问题

  1. 使用 PHP 预计会占用那么多资源,还是因为我的代码不是最优的?
  2. 我的节点代码(或 Azure)有什么问题,导致这么多请求失败?
4

1 回答 1

1

使用请求库

缓冲整个响应

最基本的方法是发出请求,将来自远程服务 (indianrail.gov.in) 的整个响应缓冲到内存中,然后将其发送回客户端。然而,值得看看下面的流式传输示例

安装所需的依赖项 npm install request eyespect

var request = require('request');
var inspect = require('eyespect').inspector({maxLength: 99999999});  // nicer console logging
var url = 'http://www.indianrail.gov.in';

var postData = {
  fooKey: 'foo value'
};
var postDataString = JSON.stringify(postData);
var opts = {
  method: 'post',
  body: postDataString // postData must be a string here..request can handle encoding key-value pairs, see documentation for details
};

inspect(postDataString, 'post data body as a string');
inspect(url, 'posting to url');
request(url, function (err, res, body) {
  if (err) {
    inspect('error posting request');
    console.log(err);
    return;
  }
  var statusCode = res.statusCode;
  inspect(statusCode, 'statusCode from remote service');
  inspect(body,'body from remote service');
});

流媒体

如果您有一个响应流可以使用,您可以流式传输发布数据,而无需先将所有内容缓冲到内存中。我猜在你的例子中这是out参数。

要添加一些错误处理,您可以使用 async 模块并重复尝试发布请求,直到它成功完成或达到最大尝试次数

npm install request filed temp eyespect async

var request = require('request');
var inspect = require('eyespect').inspector({maxLength: 99999999});  // nicer console logging
var filed = require('filed');
var temp = require('temp');
var rk = require('required-keys');
var async = require('async');

function postToService(data, cb) {

  // make sure the required key-value pairs were passed in the data parameter
  var keys = ['url', 'postData'];
  var err = rk.truthySync(data, keys);
  if (err) { return cb(err); }

  var url = data.url;
  var postData = data.postData;
  var postDataString = JSON.stringify(postData);
  var opts = {
    method: 'post',
    body: postDataString // postData must be a string here..request can handle encoding key-value pairs, see documentation for details
  };

  var filePath = temp.path({suffix: '.html'});
  // open a writable stream to a file on disk. You could however replace this with any writeable stream such as "out" in your example
  var file = filed(filePath);
  // stream the response to disk just as an example
  var r = request(url).pipe(file);
  r.on('error', function (err) {
    inspect(err, 'error streaming response to file on disk');
    cb(err);
  });

  r.on('end', function (err) {
    cb();
  });
}

function keepPostingUntilSuccess(callback) {
  var url = 'http://www.google.com';
  var postData = {
    fooKey: 'foo value'
  };
  var data = {
    url: url,
    postData: postData
  };
  var complete = false;
  var maxAttemps = 50;
  var attempt = 0;
  async.until(
    function () {
      if (complete) {
        return true;
      }
      if (attempt >= maxAttemps) {
        return true;
      }
      return false;
    },

    function (cb) {
      attempt++;
      inspect(attempt, 'posting to remote service, attempt number');
      postToService(data, function (err) {

        // simulate the request failing 3 times, then completing correctly
        if (attempt < 3) {
          err = 'desired number of attempts not yet reached';
        }
        if (!err) {
          complete = true;
        }
        cb();
      });
    },
    function (err) {
      inspect(complete, 'done with posting, did we complete successfully?');
      if (complete) {
        return callback();
      }
      callback('failed to post data, maximum number of attempts reached');
    });
}


keepPostingUntilSuccess(function (err) {
  if (err) {
    inspect(err, 'error posting data');
    return;
  }
  inspect('posted data successfully');
});
于 2013-02-06T07:23:15.353 回答