0

这是代码:

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
using unfreez_wrapper;
using Shell32;


namespace DownloadImages
{
    public partial class Form1 : Form
    {
        string f;
        string UrlsPath;
        int counter;
        UnFreezWrapper uf;
        string localFilename;

        public Form1()
        {
            InitializeComponent();

            uf = new UnFreezWrapper();
            counter = 0;
            localFilename = @"d:\localpath\";
            UrlsPath = @"d:\localpath\Urls\";

                using (WebClient client = new WebClient())
                {
                    client.DownloadFile("http://www.sat24.com/foreloop.aspx?type=1&continent=europa#",localFilename + "test.html");
                    client.DownloadFile("http://www.sat24.com/en/eu?ir=true", localFilename + "test1.html");
                }

                f = File.ReadAllText(localFilename + "test1.html");
                test("image2.ashx", "ir=true");
        }

        private void test(string firstTag, string lastTag)
        {
            List<string> imagesUrls = new List<string>();
            int startIndex = 0;
            int endIndex = 0;
            int position = 0;

            string startTag = firstTag;//"http://www.niederschlagsradar.de/images.aspx";
            string endTag = lastTag;//"cultuur=en-GB&continent=europa";

            startIndex = f.IndexOf(startTag);

            while (startIndex > 0)
            {

                endIndex = f.IndexOf(endTag,startIndex);
                if (endIndex == -1)
                {
                    break;
                }
                string t = f.Substring(startIndex, endIndex - startIndex + endTag.Length);
                imagesUrls.Add(t);
                position = endIndex + endTag.Length;
                startIndex = f.IndexOf(startTag,position);
            }
            string item = imagesUrls[imagesUrls.Count - 1];
            imagesUrls.Remove(item);
            for (int i = 0; i < imagesUrls.Count; i++)
            {
                using (WebClient client = new WebClient())
                {
                    client.DownloadFile(imagesUrls[i], UrlsPath + "Image" + counter.ToString("D6"));
                }
                counter++;
            }
            List<string> files = Directory.GetFiles(UrlsPath).ToList();
            uf.MakeGIF(files, localFilename + "weather", 80, true);
        }

首先,我将此 html 下载为 html 文件:

http://www.sat24.com/en/eu?ir=true

有 9 种不同图像/GIF 的动画。我想下载每个 gif 网址。所以在硬盘上我会得到 9 个 gif。

在阅读内容中的文件http://www.sat24.com/en/eu?ir=true时,我看到:

var imageUrls = ["/image2.ashx?region=eu&time=201309162345&ir=true","/image2.ashx?region=eu&time=201309162330&ir=true","/image2.ashx?region=eu&time=201309162315&ir=true","/image2.ashx?region=eu&time=201309162300&ir=true","/image2.ashx?region=eu&time=201309162245&ir=true","/image2.ashx?region=eu&time=201309162230&ir=true","/image2.ashx?region=eu&time=201309162215&ir=true","/image2.ashx?region=eu&time=201309162200&ir=true","/image2.ashx?region=eu&time=201309162145&ir=true"];

在列表中: imagesUrls 我看到这 9 个网址:

例如,这是在索引 0 中: image2.ashx?region=eu&time=201309162345&ir=true 我试过没有 image2.ashx?但在这两种情况下,我都会遇到错误:

client.DownloadFile(imagesUrls[i], UrlsPath + "Image" + counter.ToString("D6"));

ArgumentException 路径中有非法字符

在此之前,当我使用 test.html 和其他两个标签开始和结束时,它可以正常工作。

但现在我使用 test1.html 和这两个标签: test("image2.ashx", "ir=true"); 但得到了例外。

例如,当我拍摄一个图片网址时: image2.ashx?region=eu&time=201309170015&ir=true 并试图在 chrome 中浏览它,但我没有得到任何结果,它试图在 google 中搜索它。

它甚至不是 url 。

这是完整的异常错误:

System.ArgumentException was unhandled
  HResult=-2147024809
  Message=Illegal characters in path.
  Source=mscorlib
  StackTrace:
       at System.IO.Path.CheckInvalidPathChars(String path, Boolean checkAdditional)
       at System.Security.Permissions.FileIOPermission.CheckIllegalCharacters(String[] str)
       at System.Security.Permissions.FileIOPermission.AddPathList(FileIOPermissionAccess access, AccessControlActions control, String[] pathListOrig, Boolean checkForDuplicates, Boolean needFullPath, Boolean copyPathList)
       at System.Security.Permissions.FileIOPermission..ctor(FileIOPermissionAccess access, String[] pathList, Boolean checkForDuplicates, Boolean needFullPath)
       at System.IO.Path.GetFullPath(String path)
       at System.Net.WebClient.GetUri(String path)
       at System.Net.WebClient.DownloadFile(String address, String fileName)
       at DownloadImages.Form1.test(String firstTag, String lastTag) in d:\C-Sharp\DownloadImages\DownloadImages\DownloadImages\Form1.cs:line 79
       at DownloadImages.Form1..ctor() in d:\C-Sharp\DownloadImages\DownloadImages\DownloadImages\Form1.cs:line 45
       at DownloadImages.Program.Main() in d:\C-Sharp\DownloadImages\DownloadImages\DownloadImages\Program.cs:line 19
       at System.AppDomain._nExecuteAssembly(RuntimeAssembly assembly, String[] args)
       at System.AppDomain.ExecuteAssembly(String assemblyFile, Evidence assemblySecurity, String[] args)
       at Microsoft.VisualStudio.HostingProcess.HostProc.RunUsersAssembly()
       at System.Threading.ThreadHelper.ThreadStart_Context(Object state)
       at System.Threading.ExecutionContext.RunInternal(ExecutionContext executionContext, ContextCallback callback, Object state, Boolean preserveSyncCtx)
       at System.Threading.ExecutionContext.Run(ExecutionContext executionContext, ContextCallback callback, Object state, Boolean preserveSyncCtx)
       at System.Threading.ExecutionContext.Run(ExecutionContext executionContext, ContextCallback callback, Object state)
       at System.Threading.ThreadHelper.ThreadStart()
  InnerException: 

那我怎么能从这个网址一张一张地下载图片呢?http://www.sat24.com/en/eu?ir=true

当我使用 test.html 和 startTag "" http://www.niederschlagsradar.de/images.aspx ": 和 endTag: ""cultuur=en-GB&continent=europa"" 时效果很好!

但是现在使用 test1.html 和两个不同的标签它不起作用。

4

1 回答 1

1

在异常点上,究竟什么imagesUrls[i]

你是说它像image2.ashx?region=eu&time=201309162345&ir=true

如果是这样,您需要在其前面添加协议和服务器,即在前面http://www.sat24.com/添加一个 URIhttp://www.sat24.com/image2.ashx?region=eu&time=201309162345&ir=true

但是,另一个问题是,您正在搜索image2.ashx开始标签,然后ir=true作为结束标签。查看该页面的来源,有许多以 .结尾的image2.ashxURI 。ir=true

例如http://www.sat24.com/image2.ashx?button=af260x160

当您在该 URI 中找到开始标记时,您将在找到结束标记之前获得大量 HTML。

于 2013-09-17T04:18:37.010 回答