8

Does anyone know how to screen scrape web-sites that use digest http authentication? I use code like this:

var request = (HttpWebRequest)WebRequest.Create(SiteUrl);
request.Credentials=new NetworkCredential(Login, Password)

I'm able to access the site's mainpage, but when I try to surf to any other pages (using another request with the same credentials) I get "HTTP/1.1 400 Bad Request" error.

I used Fiddler to compare requests of my C# application with Mozilla Firefox requests.

2 URLs that I try to access are: https://mysiteurl/forum/index.php https://mysiteurl/forum/viewforum.php?f=4&sid=d104363e563968b4e4c07e04f4a15203

Here are 2 requests () of my C# app:

Authorization: Digest username="xxx",realm="abc",nonce="NXa26+NjBAA=747dfd1776c9d585bd388377ef3160f1ff265429",uri="/forum/index.php",algorithm="MD5",cnonce="89179bf17dd27785aa1c88ad976817c9",nc=00000001,qop="auth",response="3088821620d9cbbf71e775fddbacfb6d"

Authorization: Digest username="xxx",realm="abc",nonce="1h7T6+NjBAA=4fed4d804d0edcb54bf4c2f912246330d96afa76",uri="/forum/viewforum.php",algorithm="MD5",cnonce="bb990b0516a371549401c0289fbacc7c",nc=00000001,qop="auth",response="1ddb95a45fd7ea8dbefd37a2db705e3a"

And that's what Firefox sending to the server:

Authorization: Digest username="xxx", realm="abc", nonce="T9ICNeRjBAA=4fbb28d42db044e182116ac27176e81d067a313c", uri="/forum/", algorithm=MD5, response="33f29dcc5d70b61be18eaddfca9bd601", qop=auth, nc=00000001, cnonce="ab96bbe39d8d776d"
Authorization: Digest username="xxx", realm="abc", nonce="T9ICNeRjBAA=4fbb28d42db044e182116ac27176e81d067a313c", uri="/forum/viewforum.php?f=4&sid=d104363e563968b4e4c07e04f4a15203", algorithm=MD5, response="a996dae9368a79d49f2f29ea7a327cd5", qop=auth, nc=00000002, cnonce="e233ae90908860e1"

So in my app I have different values in "nonce" field while in Firefox this field is the same. On the other hand I have same values in "nc" field while Firefox increments this field.

Also when my app tries to access site pages in Fiddler i can see that it always gets response "HTTP/1.1 401 Authorization Required", while Firefox authorizes only once. I've tried to set request.PreAuthenticate = true; but it seems to have no effect...

My question is: how to properly implement digest authentication using C#? Are there any standard methods or do I have to do it from scratch? Thanks in advance.

4

3 回答 3

12

创建一个类 Digest.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Security.Cryptography;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;

namespace NUI
{
    public class DigestAuthFixer
    {
        private static string _host;
        private static string _user;
        private static string _password;
        private static string _realm;
        private static string _nonce;
        private static string _qop;
        private static string _cnonce;
        private static DateTime _cnonceDate;
        private static int _nc;

        public DigestAuthFixer(string host, string user, string password)
        {
            // TODO: Complete member initialization
            _host = host;
            _user = user;
            _password = password;
        }

        private string CalculateMd5Hash(
            string input)
        {
            var inputBytes = Encoding.ASCII.GetBytes(input);
            var hash = MD5.Create().ComputeHash(inputBytes);
            var sb = new StringBuilder();
            foreach (var b in hash)
                sb.Append(b.ToString("x2"));
            return sb.ToString();
        }

        private string GrabHeaderVar(
            string varName,
            string header)
        {
            var regHeader = new Regex(string.Format(@"{0}=""([^""]*)""", varName));
            var matchHeader = regHeader.Match(header);
            if (matchHeader.Success)
                return matchHeader.Groups[1].Value;
            throw new ApplicationException(string.Format("Header {0} not found", varName));
        }

        private string GetDigestHeader(
            string dir)
        {
            _nc = _nc + 1;

            var ha1 = CalculateMd5Hash(string.Format("{0}:{1}:{2}", _user, _realm, _password));
            var ha2 = CalculateMd5Hash(string.Format("{0}:{1}", "GET", dir));
            var digestResponse =
                CalculateMd5Hash(string.Format("{0}:{1}:{2:00000000}:{3}:{4}:{5}", ha1, _nonce, _nc, _cnonce, _qop, ha2));

            return string.Format("Digest username=\"{0}\", realm=\"{1}\", nonce=\"{2}\", uri=\"{3}\", " +
                "algorithm=MD5, response=\"{4}\", qop={5}, nc={6:00000000}, cnonce=\"{7}\"",
                _user, _realm, _nonce, dir, digestResponse, _qop, _nc, _cnonce);
        }

        public string GrabResponse(
            string dir)
        {
            var url = _host + dir;
            var uri = new Uri(url);

            var request = (HttpWebRequest)WebRequest.Create(uri);

            // If we've got a recent Auth header, re-use it!
            if (!string.IsNullOrEmpty(_cnonce) &&
                DateTime.Now.Subtract(_cnonceDate).TotalHours < 1.0)
            {
                request.Headers.Add("Authorization", GetDigestHeader(dir));
            }

            HttpWebResponse response;
            try
            {
                response = (HttpWebResponse)request.GetResponse();
            }
            catch (WebException ex)
            {
                // Try to fix a 401 exception by adding a Authorization header
                if (ex.Response == null || ((HttpWebResponse)ex.Response).StatusCode != HttpStatusCode.Unauthorized)
                    throw;

                var wwwAuthenticateHeader = ex.Response.Headers["WWW-Authenticate"];
                _realm = GrabHeaderVar("realm", wwwAuthenticateHeader);
                _nonce = GrabHeaderVar("nonce", wwwAuthenticateHeader);
                _qop = GrabHeaderVar("qop", wwwAuthenticateHeader);

                _nc = 0;
                _cnonce = new Random().Next(123400, 9999999).ToString();
                _cnonceDate = DateTime.Now;

                var request2 = (HttpWebRequest)WebRequest.Create(uri);
                request2.Headers.Add("Authorization", GetDigestHeader(dir));
                response = (HttpWebResponse)request2.GetResponse();
            }
            var reader = new StreamReader(response.GetResponseStream());
            return reader.ReadToEnd();
        }
    }
}

现在在您的应用程序中,您可以使用以下代码:

DigestAuthFixer digest = new DigestAuthFixer(url, username, password);
string strReturn = digest.GrabResponse(url);
于 2012-11-21T19:39:11.797 回答
2

我目前正在观察同样的问题,尽管我正在测试的 Web 服务器是我自己的。服务器日志显示:

Digest: uri mismatch - </var/path/some.jpg> does not match request-uri
        </var/path/some.jpg?parameter=123456789>

我尝试从 URL 中删除参数(因为这似乎有所不同),但错误仍然像以前一样发生。

我的结论是 URL 参数也必须包含在摘要哈希中,并且HttpWebRequest出于某种原因将其删除。

于 2010-07-03T18:26:04.163 回答
0

在 @kitwalker 的出色工作的基础上,这是我为 DotNetCore 3 编写的委托处理程序。

    /// <summary>
    /// Respond to a Digest auth challenge and retry the request.
    /// See <c>https://en.wikipedia.org/wiki/Digest_access_authentication</c>.
    /// </summary>
    /// <example>
    /// Example response header with challenge details:
    /// header: www-authenticate
    /// value: <c>Digest realm="Signaling Controller", charset="UTF-8", algorithm=MD5, nonce="6088c71a:a699df7b2e03c53cfe06f8d070f4345c", qop="auth"</c>
    /// </example>
    public class DigestAuthenticationHandler : DelegatingHandler
    {
        private readonly ILogger _logger;
        private readonly CredentialSettings _settings;

        public DigestAuthenticationHandler(ILogger<DigestAuthenticationHandler> logger, CredentialSettings settings)
        {
            _logger = logger;
            _settings = settings;
        }

        protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request,
            CancellationToken cancellationToken)
        {
            var username = _settings?.Username;
            var password = _settings?.Password;

            if (string.IsNullOrEmpty(username))
            {
                throw new ArgumentNullException(nameof(username), "Missing credentials.");
            }

            if (string.IsNullOrEmpty(password))
            {
                throw new ArgumentNullException(nameof(password), "Missing credentials.");
            }

            var response = await base.SendAsync(request, cancellationToken);

            if (response.StatusCode == HttpStatusCode.Unauthorized)
            {
                var header = response.Headers.WwwAuthenticate;
                var value = header.FirstOrDefault();

                _logger.LogDebug("Challenged with scheme: '{Scheme}'", value?.Scheme ?? "(unknown)");

                if (string.Compare(value?.Scheme, "Digest", StringComparison.OrdinalIgnoreCase) != 0)
                {
                    _logger.LogDebug("Authentication challenge is not of type 'Digest' - give up");
                    return response;
                }

                var details = new DigestAuthenticationDetails(value?.Parameter);

                _logger.LogDebug("Authentication challenge details: {Details}", details);

                var qop = details["qop"];
                var realm = details["realm"];
                var nonce = details["nonce"];
                var algorithm = details["algorithm"];

                if (algorithm != "MD5")
                {
                    _logger.LogError("Algorithm '{Algorithm}' unsupported; cannot respond to Digest auth challenge - give up", algorithm);
                    return response;
                }

                var cnonce = new Random().Next(123400, 9999999).ToString();
                var nc = 1;

                var uri = request.RequestUri.PathAndQuery;

                var digest = BuildDigestHeader(username, password, request.Method, uri, realm, algorithm, nonce, cnonce, qop, nc);

                request.Headers.Add("Authorization", digest);

                var retry = await base.SendAsync(request, cancellationToken);

                return retry;
            }

            return response;
        }

        private static string BuildDigestHeader(
            string username,
            string password,
            HttpMethod method,
            string uri,
            string realm,
            string algorithm,
            string nonce,
            string cnonce,
            string qop,
            int nc)
        {
            static string CalculateMd5Hash(string input)
            {
                var bytes = Encoding.ASCII.GetBytes(input);
                var hash = MD5.Create().ComputeHash(bytes);
                var builder = new StringBuilder();

                foreach (var b in hash)
                {
                    builder.Append(b.ToString("x2"));
                }

                return builder.ToString();
            }

            var ha1 = CalculateMd5Hash($"{username}:{realm}:{password}");
            var ha2 = CalculateMd5Hash($"{method}:{uri}");
            var digestResponse = CalculateMd5Hash($"{ha1}:{nonce}:{nc:00000000}:{cnonce}:{qop}:{ha2}");

            return "Digest "
                   + $"username=\"{username}\", "
                   + $"realm=\"{realm}\", "
                   + $"nonce=\"{nonce}\", "
                   + $"uri=\"{uri}\", "
                   + $"algorithm=\"{algorithm}\", "
                   + $"response=\"{digestResponse}\", "
                   + $"qop={qop}, "
                   + $"nc={nc:00000000}, "
                   + $"cnonce=\"{cnonce}\"";
        }

        private class DigestAuthenticationDetails
        {
            private readonly Dictionary<string, string?> _values;

            public DigestAuthenticationDetails(string? authentication)
            {
                _values = new Dictionary<string, string?>();

                if (authentication != null)
                {
                    foreach (var pair in authentication.Split(","))
                    {
                        var item = pair.Split("=");

                        string? key = null;
                        string? value = null;

                        if (item.Length == 1)
                        {
                            key = item.ElementAt(0);

                        }
                        else
                        {
                            key = item.ElementAt(0);
                            value = item.ElementAt(1);
                        }

                        key = key
                            .Trim()
                            .Replace("\"", "")
                            .Replace("'", "")
                            .ToLower();

                        value = value
                            ?.Trim()
                            .Replace("\"", "")
                            .Replace("'", "");

                        _values.Add(key, value);
                    }
                }
            }

            public string this[string key] => GetValueOrThrow(key);

            public override string ToString()
            {
                var builder = new StringBuilder();

                foreach (var (key, value) in _values)
                {
                    builder.Append($"'{key}'='{value}' ");
                }

                return builder.ToString();
            }

            private string GetValueOrThrow(string key)
            {
                if (_values.TryGetValue(key, out var value))
                {
                    if (value != null)
                    {
                        return value;
                    }

                    throw new ArgumentNullException(nameof(value), $"No value for key '{key}'.");
                }

                throw new ArgumentOutOfRangeException(nameof(key), $"Key '{key}' was not found in Digest auth challenge.");
            }
        }
    }

然后,无论您在哪里注册服务,都将委托处理程序添加到HttpClient需要摘要身份验证功能的地方:

services.AddTransient<DigestAuthenticationHandler>();
services.AddHttpClient<ServiceThatNeedsHttpClient>()
    .AddHttpMessageHandler<DigestAuthenticationHandler>();

注意:不支持缓存之前使用的摘要标头。

于 2021-04-28T04:18:19.850 回答