Commit b0950589 authored by Bertrone Matteo's avatar Bertrone Matteo

http filter example

parent 3ffffa7c
#Simple HTTP Filter: Project purpose
Write an eBPF application that parses HTTP packets and extracts (and prints on screen) the URL contained in the GET/POST request.
[https://github.com/netgroup-polito/ebpf-test](https://github.com/netgroup-polito/ebpf-test)
#Usage Example
```Shell
$ sudo python http-parse-v2.py
GET /pipermail/iovisor-dev/ HTTP/1.1
HTTP/1.1 200 OK
GET /favicon.ico HTTP/1.1
HTTP/1.1 404 Not Found
GET /pipermail/iovisor-dev/2016-January/thread.html HTTP/1.1
HTTP/1.1 200 OK
GET /pipermail/iovisor-dev/2016-January/000046.html HTTP/1.1
HTTP/1.1 200 OK
```
#Implementation using BCC
eBPF socket filter.<br />
Filters IP and TCP packets, containing "HTTP", "GET", "POST" in payload and all subsequent packets belonging to the same session, having the same (ip_src,ip_dst,port_src,port_dst).<br />
Program is loaded as PROG_TYPE_SOCKET_FILTER and attached to a socket, bind to eth0. <br />
Matching packets are forwarded to user space, others dropped by the filter.<br />
<br />
Python script reads filtered raw packets from the socket, if necessary reassembles packets belonging to the same session, and prints on stdout the first line of the HTTP GET/POST request. <br />
# Usage
Require:
- BPF Compiler Collection [BCC](https://github.com/iovisor/bcc)
- Follow [INSTALL](https://github.com/iovisor/bcc/blob/master/INSTALL.md) guide
# To run:
```Shell
$ sudo python http-parse.py
$ sudo python http-parse-v2.py
```
\ No newline at end of file
#include <uapi/linux/ptrace.h>
#include <net/sock.h>
#include <bcc/proto.h>
#define IP_TCP 6
#define ETH_HLEN 14
struct Key {
u32 src_ip; //source ip
u32 dst_ip; //destination ip
unsigned short src_port; //source port
unsigned short dst_port; //destination port
};
struct Leaf {
int timestamp; //timestamp in ns
};
//BPF_TABLE(map_type, key_type, leaf_type, table_name, num_entry)
//map <Key, Leaf>
//tracing sessions having same Key(dst_ip, src_ip, dst_port,src_port)
BPF_TABLE("hash", struct Key, struct Leaf, sessions, 1024);
/*eBPF program.
Filter IP and TCP packets, having payload not empty
and containing "HTTP", "GET", "POST" as first bytes of payload.
AND ALL the other packets having same (src_ip,dst_ip,src_port,dst_port)
this means belonging to the same "session"
this additional check avoids url truncation, if url is too long
userspace script, if necessary, reassembles urls splitted in 2 or more packets.
if the program is loaded as PROG_TYPE_SOCKET_FILTER
and attached to a socket
return 0 -> DROP the packet
return -1 -> KEEP the packet and return it to user space (userspace can read it from the socket_fd )
*/
int http_filter(struct __sk_buff *skb) {
u8 *cursor = 0;
struct ethernet_t *ethernet = cursor_advance(cursor, sizeof(*ethernet));
//filter IP packets (ethernet type = 0x0800)
if (!(ethernet->type == 0x0800)){
goto DROP;
}
struct ip_t *ip = cursor_advance(cursor, sizeof(*ip));
//filter TCP packets (ip next protocol = 0x06)
if (ip->nextp != IP_TCP) {
goto DROP;
}
u32 tcp_header_length = 0;
u32 ip_header_length = 0;
u32 payload_offset = 0;
u32 payload_length = 0;
struct Key key;
struct Leaf leaf;
struct tcp_t *tcp = cursor_advance(cursor, sizeof(*tcp));
//retrieve ip src/dest and port src/dest of current packet
//and save it into struct Key
key.dst_ip = ip->dst;
key.src_ip = ip->src;
key.dst_port = tcp->dst_port;
key.src_port = tcp->src_port;
//calculate ip header length
//value to multiply * 4
//e.g. ip->hlen = 5 ; IP Header Length = 5 x 4 byte = 20 byte
ip_header_length = ip->hlen << 2; //SHL 2 -> *4 multiply
//calculate tcp header length
//value to multiply *4
//e.g. tcp->offset = 5 ; TCP Header Length = 5 x 4 byte = 20 byte
tcp_header_length = tcp->offset << 2; //SHL 2 -> *4 multiply
//calculate patload offset and lenght
payload_offset = ETH_HLEN + ip_header_length + tcp_header_length;
payload_length = ip->tlen - ip_header_length - tcp_header_length;
//http://stackoverflow.com/questions/25047905/http-request-minimum-size-in-bytes
//minimum lenght of http request is always geater than 7 bytes
//avoid invalid access memory
//include empty payload
if(payload_length < 7){
goto DROP;
}
//load firt 7 byte of payload into payload_array
//direct access to skb not allowed
unsigned long payload_array[7];
int i = 0;
int j = 0;
for (i = payload_offset ; i < (payload_offset + 7) ; i++){
payload_array[j] = load_byte(skb , i);
j++;
}
//find a match with an HTTP message
//HTTP
if ( (payload_array[0] == 'H') && (payload_array[1] == 'T') && (payload_array[2] == 'T') && (payload_array[3] == 'P')){
goto HTTP_MATCH;
}
//GET
if ( (payload_array[0] == 'G') && (payload_array[1] == 'E') && (payload_array[2] == 'T') ){
goto HTTP_MATCH;
}
//POST
if ( (payload_array[0] == 'P') && (payload_array[1] == 'O') && (payload_array[2] == 'S') && (payload_array[3] == 'T')){
goto HTTP_MATCH;
}
//PUT
if ( (payload_array[0] == 'P') && (payload_array[1] == 'U') && (payload_array[2] == 'T') ){
goto HTTP_MATCH;
}
//DELETE
if ( (payload_array[0] == 'D') && (payload_array[1] == 'E') && (payload_array[2] == 'L') && (payload_array[3] == 'E') && (payload_array[4] == 'T') && (payload_array[5] == 'E')){
goto HTTP_MATCH;
}
//HEAD
if ( (payload_array[0] == 'H') && (payload_array[1] == 'E') && (payload_array[2] == 'A') && (payload_array[3] == 'D')){
goto HTTP_MATCH;
}
//no HTTP match
//check if packet belong to an HTTP session
struct Leaf * lookup_leaf = sessions.lookup(&key);
if(lookup_leaf){
//send packet to userspace
goto KEEP;
}
goto DROP;
//keep the packet and send it to userspace retruning -1
HTTP_MATCH:
//if not already present, insert into map <Key, Leaf>
leaf.timestamp = 0;
sessions.lookup_or_init(&key, &leaf);
sessions.update(&key,&leaf);
//send packet to userspace returning -1
KEEP:
return -1;
//drop the packet returning 0
DROP:
return 0;
}
This diff is collapsed.
#include <uapi/linux/ptrace.h>
#include <net/sock.h>
#include <bcc/proto.h>
#define IP_TCP 6
#define ETH_HLEN 14
/*eBPF program.
Filter IP and TCP packets, having payload not empty
and containing "HTTP", "GET", "POST" ... as first bytes of payload
if the program is loaded as PROG_TYPE_SOCKET_FILTER
and attached to a socket
return 0 -> DROP the packet
return -1 -> KEEP the packet and return it to user space (userspace can read it from the socket_fd )
*/
int http_filter(struct __sk_buff *skb) {
u8 *cursor = 0;
struct ethernet_t *ethernet = cursor_advance(cursor, sizeof(*ethernet));
//filter IP packets (ethernet type = 0x0800)
if (!(ethernet->type == 0x0800)){
goto DROP;
}
struct ip_t *ip = cursor_advance(cursor, sizeof(*ip));
//filter TCP packets (ip next protocol = 0x06)
if (ip->nextp != IP_TCP) {
goto DROP;
}
u32 tcp_header_length = 0;
u32 ip_header_length = 0;
u32 payload_offset = 0;
u32 payload_length = 0;
struct tcp_t *tcp = cursor_advance(cursor, sizeof(*tcp));
//calculate ip header length
//value to multiply * 4
//e.g. ip->hlen = 5 ; IP Header Length = 5 x 4 byte = 20 byte
ip_header_length = ip->hlen << 2; //SHL 2 -> *4 multiply
//calculate tcp header length
//value to multiply *4
//e.g. tcp->offset = 5 ; TCP Header Length = 5 x 4 byte = 20 byte
tcp_header_length = tcp->offset << 2; //SHL 2 -> *4 multiply
//calculate patload offset and lenght
payload_offset = ETH_HLEN + ip_header_length + tcp_header_length;
payload_length = ip->tlen - ip_header_length - tcp_header_length;
//http://stackoverflow.com/questions/25047905/http-request-minimum-size-in-bytes
//minimum lenght of http request is always geater than 7 bytes
//avoid invalid access memory
//include empty payload
if(payload_length < 7){
goto DROP;
}
//load firt 7 byte of payload into payload_array
//direct access to skb not allowed
unsigned long payload_array[7];
int i = 0;
int j = 0;
for (i = payload_offset ; i < (payload_offset + 7) ; i++){
payload_array[j] = load_byte(skb , i);
j++;
}
//find a match with an HTTP message
//HTTP
if ( (payload_array[0] == 'H') && (payload_array[1] == 'T') && (payload_array[2] == 'T') && (payload_array[3] == 'P')){
goto KEEP;
}
//GET
if ( (payload_array[0] == 'G') && (payload_array[1] == 'E') && (payload_array[2] == 'T') ){
goto KEEP;
}
//POST
if ( (payload_array[0] == 'P') && (payload_array[1] == 'O') && (payload_array[2] == 'S') && (payload_array[3] == 'T')){
goto KEEP;
}
//PUT
if ( (payload_array[0] == 'P') && (payload_array[1] == 'U') && (payload_array[2] == 'T') ){
goto KEEP;
}
//DELETE
if ( (payload_array[0] == 'D') && (payload_array[1] == 'E') && (payload_array[2] == 'L') && (payload_array[3] == 'E') && (payload_array[4] == 'T') && (payload_array[5] == 'E')){
goto KEEP;
}
//HEAD
if ( (payload_array[0] == 'H') && (payload_array[1] == 'E') && (payload_array[2] == 'A') && (payload_array[3] == 'D')){
goto KEEP;
}
//no HTTP match
goto DROP;
//keep the packet and send it to userspace retruning -1
KEEP:
return -1;
//drop the packet returning 0
DROP:
return 0;
}
#!/usr/bin/python
#
#Bertrone Matteo - Polytechnic of Turin
#November 2015
#
#eBPF application that parses HTTP packets
#and extracts (and prints on screen) the URL contained in the GET/POST request.
#
#eBPF program http_filter is used as SOCKET_FILTER attached to eth0 interface.
#only packet of type ip and tcp containing HTTP GET/POST are returned to userspace, others dropped
#
#python script uses bcc BPF Compiler Collection by iovisor (https://github.com/iovisor/bcc)
#and prints on stdout the first line of the HTTP GET/POST request containing the url
from __future__ import print_function
from bcc import BPF
import sys
import socket
import os
# initialize BPF - load source code from http-parse.c
bpf = BPF(src_file = "http-parse.c",debug = 0)
#load eBPF program http_filter of type SOCKET_FILTER into the kernel eBPF vm
#more info about eBPF program types
#http://man7.org/linux/man-pages/man2/bpf.2.html
function_http_filter = bpf.load_func("http_filter", BPF.SOCKET_FILTER)
#create raw socket, bind it to eth0
#attach bpf program to socket created
BPF.attach_raw_socket(function_http_filter, "eth0")
#get file descriptor of the socket previously created inside BPF.attach_raw_socket
socket_fd = function_http_filter.sock
#create python socket object, from the file descriptor
sock = socket.fromfd(socket_fd,socket.PF_PACKET,socket.SOCK_RAW,socket.IPPROTO_IP)
#set it as blocking socket
sock.setblocking(True)
while 1:
#retrieve raw packet from socket
packet_str = os.read(socket_fd,2048)
#DEBUG - print raw packet in hex format
#packet_hex = toHex(packet_str)
#print ("%s" % packet_hex)
#convert packet into bytearray
packet_bytearray = bytearray(packet_str)
#ethernet header length
ETH_HLEN = 14
#IP HEADER
#https://tools.ietf.org/html/rfc791
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# |Version| IHL |Type of Service| Total Length |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
#IHL : Internet Header Length is the length of the internet header
#value to multiply * 4 byte
#e.g. IHL = 5 ; IP Header Length = 5 * 4 byte = 20 byte
#
#Total Lenght: This 16-bit field defines the entire packet size,
#including header and data, in bytes.
#calculate packet total lenght
total_lenght = packet_bytearray[ETH_HLEN + 2] #load MSB
total_lenght = total_lenght << 8 #shift MSB
total_lenght = total_lenght + packet_bytearray[ETH_HLEN+3] #add LSB
#calculate ip header lenght
ip_header_length = packet_bytearray[ETH_HLEN] #load Byte
ip_header_length = ip_header_length & 0x0F #mask bits 0..3
ip_header_length = ip_header_length << 2 #shift to obtain lenght
#TCP HEADER
#https://www.rfc-editor.org/rfc/rfc793.txt
# 12 13 14 15
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# | Data | |U|A|P|R|S|F| |
# | Offset| Reserved |R|C|S|S|Y|I| Window |
# | | |G|K|H|T|N|N| |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
#Data Offset: This indicates where the data begins.
#The TCP header is an integral number of 32 bits long.
#value to multiply * 4 byte
#e.g. DataOffset = 5 ; TCP Header Length = 5 * 4 byte = 20 byte
#calculate tcp header lenght
tcp_header_lenght = packet_bytearray[ETH_HLEN + ip_header_length + 12] #load Byte
tcp_header_lenght = tcp_header_lenght & 0xF0 #mask bit 4..7
tcp_header_lenght = tcp_header_lenght >> 2 #SHR 4 ; SHL 2 -> SHR 2
#calculate payload offset
payload_offset = ETH_HLEN + ip_header_length + tcp_header_lenght
#print first line of the HTTP GET/POST request
#line ends with 0xOD 0xOA (\r\n)
#(if we want to print all the header print until \r\n\r\n)
for i in range (payload_offset-1,len(packet_bytearray)-1):
if (packet_bytearray[i]== 0x0A):
if (packet_bytearray[i-1] == 0x0D):
break
print ("%c" % chr(packet_bytearray[i]), end = "")
print("")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment