-4

我正在尝试从 python 脚本的 libpcap 文件重建网页。我有所有的数据包,所以我想目标是有一个 libpcap 文件作为输入,然后你会找到所有必要的数据包,并以某种方式有一个网页文件作为输出,其中包含该页面的所有图片和数据。谁能让我朝着正确的方向开始。我想我需要 dkpt 和/或 scaPY。

更新1:代码如下! 这是迄今为止我在 Python 中提出的代码。假设从单个 HTTP 会话中获取第一组数据包,该会话以 SYN 和 ACK 标志设置为 1 的数据包开始,并以 FIN 标志设置为 1 的数据包结束。

假设在数据包捕获期间只访问了一个网站,此代码是否会附加重建访问网页所需的所有必要数据包?

假设我拥有所有必要的数据包,我该如何重建网页?

import scaPy

pktList = list() #create a list to store the packets we want to keep
pcap = rdpcap('myCapture.pcap') #returns a packet list with every packet in the pcap
count = 0                       #will store the index of the syn-ack packet in pcap
for pkt in pcap:                #loops through packet list named pcap one packet at a time
    count = count + 1       #increments by 1
    if pkt[TCP].flags == 0x12 and pkt[TCP].sport == 80: #if it is a SYN-ACK packet session has been initiated as http
    break #breaks out of the for loop
currentPkt = count    #loop from here
while pcap[currentPkt].flags&0x01 != 0x01: #while the FIN bit is set to 0 keep loops stop when it is a 1 
    if pcap[currentPkt].sport == 80 and pcap[currentPkt].dport == pcap[count].dport and pcap[currentPkt].src == pcap[count].src and pcap[currentPkt].dst == pcap[count].dst:
            #if the src, dst ports and IP's are the same as the SYN-ACK packet then the http packets belong to this session and we want to keep them

        pktList.append(pcap[currentPkt])
#once the loop exits we have hit the packet with the FIN flag set and now we need to reconstruct the packets from this list.
        currentPkt = currentPkt + 1
4

2 回答 2

0

也许类似的东西tcpick -r your.pcap -wRS可以为您完成这项工作。

于 2014-01-22T18:29:57.300 回答
0

此 python 脚本将提取 PCAP 文件中所有未加密的 HTTP 网页并将它们输出为 HTML 文件。它使用 scaPY 处理单个数据包(另一个好的 python 模块是 dpkt)。

from scapy.all import *
from operator import *
import sys



def sorting(pcap):
    newerList = list()
        #remove everything not HTTP (anything not TCP or anything TCP and not HTTP (port 80)
    #count = 0 #dont need this it was for testing
    for x in pcap:
        if x.haslayer(TCP) and x.sport == 80 and bin(x[TCP].flags)!="0b10100": 
            newerList.append(x);
    newerList = sorted(newerList, key=itemgetter("IP.src","TCP.dport"))
    wrpcap("sorted.pcap", newerList)
    return newerList


def extract(pcap,num, count):
    listCounter = count
    counter = 0
    #print listCounter

    #Exit if we have reached the end of the the list of packets
    if count >= len(pcap):
        sys.exit()
    #Create a new file and find the packet with the payload containing the beginning HTML code and write it to file
    while listCounter != len(pcap):
        thisFile = "file" + str(num) + ".html"
        file = open(thisFile,"a")
        s = str(pcap[listCounter][TCP].payload)
        #print "S is: ", s
        x,y,z = s.partition("<")
        s = x + y + z #before was y+z
        if s.find("<html") != -1: 
            file.write(s)
            listCounter = listCounter + 1
            break
        listCounter = listCounter + 1

    #Continue to loop through packets and write their contents until we find the close HTML tag and 
    #include that packet as well
    counter = listCounter
    while counter != len(pcap):
        s =  str(pcap[counter][TCP].payload)
        if s.find("</html>") != -1:
            file.write(s)
            file.close
            break
        else:
            file.write(s)
            counter = counter + 1

    #Recursively call the function incrementing the file name by 1
    #and giving it the last spot in the PCAP we were in so we continue
    #at the next PCAP
    extract(pcap, num+1, counter)


if __name__ == "__main__":
    #Read in file from user
    f = raw_input("Please enter the name of your pcap file in this directory.  Example: myFile.pcap")
    pcapFile  = rdpcap(f)
    print "Filtering Pcap File of non HTTP Packets and then sorting packets"
    #Sort and Filter the PCAP
    pcapFile = sorting(pcapFile)
    print "Sorting Complete"
    print "Extracting Data"
    #Extract the Data
    extract(pcapFile,1,0)
    Print "Extracting Complete"
于 2014-04-26T01:03:41.367 回答