0

我需要使用 .net 套接字加载任何网页,这样我就可以自己控制连接和接收数据的方式。

笔记

  • 在这里使用 WebClient 和 HttpWebRequest 不是一个选项,我需要使用 TcpClient 和 Socket。
  • 如果我可以使用它们(HttpWebRequest)但自己控制连接和数据,我会很高兴。
  • 我真正的目标是使用我自己的 HTTP 规则使用 Socket 加载任何网页。

更新

通过运行 C# HTTP 代理服务器并使用 WebClient 代理地址,可以使用 WebClient 并自己控制连接和数据。 #心理

4

2 回答 2

1

RFC中非常清楚地描述了分块传输编码。每个块存在于:

chunk-size[;chunk-extensions]<CRLF>
chunk-data<CRLF>

块大小首先发送,它是一个十六进制数,指定期望的块数据的字节数,可选地后跟块扩展,由 CRLF 或\r\n. 在您读取指定数量的字节后,您可以期待另一个 CRLF,因此您必须再读取两个字节。

然后你就可以开始阅读下一个块了。如果chunk-size0,则期望再读取两个CRLF's(不将其添加到缓冲区),然后您已收到所有数据,因为大小为 0 的块表示最后一个块。

请注意,您不能使用ReadLine()读取块,因为响应正文中的换行符(即 in chunk-data)将被视为行,因此它可能会在读取整个块之前返回。

于 2012-07-26T12:52:55.173 回答
0

这是使用 C# 加载任何网页的方法,包括分块网页,它适用于 .Net 中可用的所有类型的流

此代码基于 RFC 2616-Section-3.6

public class HttpStream : Stream
{
    internal readonly Stream InnerStream;
    private long length;
    private bool canRead;

    internal bool Chunked { get; set; }
    internal int ChunkLength { get; set; }
    internal int ChunkReceivedPosition { get; set; }

    internal HttpStream(Stream innerStream)
    {
        InnerStream = innerStream;
        ChunkLength = -1;
        canRead = true;
    }

    public override int Read(byte[] buffer, int offset, int count)
    {
        if (!canRead)
            return -1;

        var bytesReadInCallSession = 0;

        if (Chunked)
        {
            do
            {
                if (ChunkLength == -1)
                {
                    // read next chunked content size
                    string chunkLengthString = InnerStream.ReadLine(returnLineEndBytes: false);
                    ChunkLength = Convert.ToInt32(chunkLengthString, 16);
                }

                // end of HTTP response-body
                if (ChunkLength == 0)
                {
                    canRead = false;
                    break;
                }

                int toRead = ChunkLength;
                if (count + ChunkReceivedPosition - bytesReadInCallSession < ChunkLength)
                    toRead = count + ChunkReceivedPosition - bytesReadInCallSession;

                // read chunked part
                while (ChunkReceivedPosition < toRead)
                {
                    var bytesRead = InnerStream.Read(buffer, offset + bytesReadInCallSession, toRead - ChunkReceivedPosition);
                    ChunkReceivedPosition += bytesRead;
                    bytesReadInCallSession += bytesRead;
                    Position += bytesRead;
                }

                if (ChunkReceivedPosition == ChunkLength)
                {
                    // force to read next chunk size in next loop
                    ChunkLength = -1;
                    ChunkReceivedPosition = 0;

                    // discard anything until we reach after the first CR LF
                    InnerStream.ReadLine();
                }

                if (bytesReadInCallSession == count)
                    break;
            } while (true);

            if (!canRead)
            {
                do
                {
                    string trailer = InnerStream.ReadLine();
                    if (String.IsNullOrWhiteSpace(trailer))
                        break;

                    // TODO: process trailers
                } while (true);
            }

            return bytesReadInCallSession;
        }
        else
        {
            var countRead = InnerStream.Read(buffer, offset, count);
            Position += countRead;
            return countRead;
        }
    }

    public override void SetLength(long value)
    {
        length = value;
    }

    public override bool CanRead
    {
        get { return canRead; }
    }

    public override long Length
    {
        get { return length; }
    }

    public override long Position
    {
        get;
        set;
    }

    public override void Flush()
    {
        throw new NotImplementedException();
    }

    public override long Seek(long offset, SeekOrigin origin)
    {
        throw new NotImplementedException();
    }

    public override void Write(byte[] buffer, int offset, int count)
    {
        throw new NotImplementedException();
    }

    public override bool CanSeek
    {
        get { throw new NotImplementedException(); }
    }

    public override bool CanWrite
    {
        get { throw new NotImplementedException(); }
    }
}

我们需要一个高级的 ReadLine 作为 Stream 类的扩展方法

public static class Extensions
{
    public static string ReadLine(this Stream stream, bool returnLineEndBytes = true, byte[] lineEndBytes = null)
    {
        // default end line bytes
        if (lineEndBytes == null)
            lineEndBytes = new byte[2] { 13, 10 };

        StringBuilder stringBuilder = new StringBuilder("");
        var buffer = new byte[lineEndBytes.Length];
        var index = 0;

        do
        {
            var byteRead = stream.ReadByte();

            // end of stream break loop
            if (byteRead == -1)
                break;

            stringBuilder.Append((char)byteRead);

            buffer[index] = (byte)byteRead;

            if (index == lineEndBytes.Length - 1 && buffer.SequenceEqual(lineEndBytes))
                break;

            // shift bytes by one to the left
            if (index == lineEndBytes.Length - 1)
                buffer = buffer.Skip(1).Concat(new byte[] { 0 }).ToArray();

            if (index < lineEndBytes.Length - 1)
                index++;

        } while (true);

        if (!returnLineEndBytes)
            stringBuilder = stringBuilder.Remove(stringBuilder.Length - lineEndBytes.Length, lineEndBytes.Length);

        return stringBuilder.ToString();
    }
} 
于 2012-07-26T20:19:13.733 回答