0

我正在使用代理登录到一个安全站点,并希望能够将所有文件和文件夹下载到我的本地磁盘上。这就是我到目前为止所拥有的。

编辑-**目前下面的代码将从给定的根目录开始并下载所有子目录中的所有文件......非常酷:)但它不会复制我需要的文件夹结构。请问有什么帮助吗?**编辑

首先我得到 4 个参数(所以可以在 Linux 的 cmd 行上使用)

1) 我要下载的目录的 url 2) 安全登录的用户名 3) psw​​ 4) 我希望将文件保存在本地磁盘上的目录

       public class ApacheUrl4
{
// this is the entry point for what I want the instase of the class to do
    public static void main(String args[]) throws Exception {

        String url  = args[0];
        final String username  = args[1];
        final String password1  = args[2];
        String directory  = args[3];

        checkArguments(args);

        ApacheUrl4 max = new ApacheUrl4();
        max.process(url, username, password1, directory);

    }
    public void process (String url, String username1, String password1, String directory) throws Exception {

        final char[] password  = password1.toCharArray();   
        final String username = username1;
         Authenticator.setDefault(new Authenticator(){
              protected  PasswordAuthentication  getPasswordAuthentication(){
               PasswordAuthentication p=new PasswordAuthentication(username , password);
               return p;
              }
             });


        BufferedInputStream in = null;
        BufferedInputStream in2 = null;
        FileOutputStream fout = null;
    // proxy 
        String proxyip = "000.000.000" ;
        int proxyport = 8080;
        Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyip, proxyport));
     // URL connection to file 
        URL file = new URL(url);
        URLConnection connection = file.openConnection(proxy);      
        ((HttpURLConnection)connection).getResponseCode();
        int reponsecode = ((HttpURLConnection)connection).getResponseCode();
        System.out.println("response code " + reponsecode);


        if (reponsecode == HttpURLConnection.HTTP_FORBIDDEN){
            System.out.println("Invalid username or psw");
            return;
        }
        if (reponsecode != HttpURLConnection.HTTP_OK){
            System.out.println("Unable to find response");
            return;
        }





        //Save the file into the chosen folder
        in = new BufferedInputStream(connection.getInputStream());

        //Create instance of DocumentBuilderFactory
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        //Get the DocumentBuilder
        DocumentBuilder docBuilder = factory.newDocumentBuilder();
        //Using existing XML Document
        Document doc = docBuilder.parse(in);

        //create the root element 
        Element root = doc.getDocumentElement();
        NodeList nodeList = root.getElementsByTagName("li"); 


        for(int i=0; i<nodeList.getLength(); i++){
          Node childNode = nodeList.item(i);
          if (childNode.getTextContent().contains("/")) {


            //  System.out.println(url + childNode.getTextContent());
                process(url + childNode.getTextContent(), username, password1, directory);                        

        }

    if (childNode.getTextContent().contains(".") && !childNode.getTextContent().contains("..")) {


            String textcon =  url + childNode.getTextContent();
            System.out.println("aaa " + textcon);

            if (url.endsWith("/")) {
                System.out.println("ends with a /");    
            }

            textcon = textcon.replace( " ", "%20");
            URL file2 = new URL(textcon);

            String[] urlparts = textcon.split("/");
            int urllength = urlparts.length;
            String lastarray = urlparts[urllength-2];
            System.out.println("last array " + lastarray);


            URLConnection connection2 = file2.openConnection(proxy);        
            in2 = new BufferedInputStream(connection2.getInputStream());
            String test2 = childNode.getTextContent();
            System.out.println("eeee " + childNode.getTextContent());

            String filename = (directory + test2 );
              File f=new File(filename);
                  if(f.isDirectory())
                  continue;





              //InputStream inputStream= new FileInputStream("InputStreamToFile.java");
              OutputStream out=new FileOutputStream(f);
              byte buf[]=new byte[1024];
              int len;
              while((len=in2.read(buf))>0)
              out.write(buf,0,len);
              out.close();
              in2.close();


        }
    }
}




    // this is part of the validation of arguments provided by user
    private static void checkArguments(String[] args) {
        while (args.length < 4 || args[0].isEmpty() || args.length > 4 ) {
                System.out.println("Please specify five arguments in the following format \n "  +
                " URL USERNAME PASWORD FILEPATH FILENAME " +
                "EG: \"java helloW http://www.google.com user_name password C:\\path/dir/ filename.exe\" ");
                System.exit(1);
         }
    }
}
4

1 回答 1

0

为了下载目录中的文件,您首先需要目录列表。如果允许,这将由服务器自动生成。首先,使用您的浏览器检查此特定服务器上是否存在这种情况。

然后您需要解析列表页面,并下载每个 url。坏消息是这些页面没有标准。好消息是大多数互联网都托管在 apache 或 IIS 上,所以如果你能管理这两个,你就有了很好的部分。

您可能只需将文件解析为 xml (xhtml) 并使用 xpath 来恢复所有 url。

于 2012-07-17T12:25:52.923 回答