Merge branch 'af_packet-timestamp'

Daniel Borkmann says:

====================
This is a joint effort with Willem to bring optional i) tx hw/sw
timestamping into PF_PACKET, that was reported by Paul Chavent,
and ii) to expose the type of timestamp to the user, which is in
the current situation not possible to distinguish with the RX_RING
and TX_RING API (but distinguishable through the normal timestamping
API), reported by Richard Cochran. This set is based on top of
``packet: account statistics only in tpacket_stats_u''. Related
discussion can be found in: http://patchwork.ozlabs.org/patch/238125/
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2013-04-25 01:22:53 -04:00
commit 660f7d2229
4 changed files with 122 additions and 45 deletions

View File

@ -1016,10 +1016,11 @@ retry_block:
-------------------------------------------------------------------------------
The PACKET_TIMESTAMP setting determines the source of the timestamp in
the packet meta information. If your NIC is capable of timestamping
packets in hardware, you can request those hardware timestamps to used.
Note: you may need to enable the generation of hardware timestamps with
SIOCSHWTSTAMP.
the packet meta information for mmap(2)ed RX_RING and TX_RINGs. If your
NIC is capable of timestamping packets in hardware, you can request those
hardware timestamps to be used. Note: you may need to enable the generation
of hardware timestamps with SIOCSHWTSTAMP (see related information from
Documentation/networking/timestamping.txt).
PACKET_TIMESTAMP accepts the same integer bit field as
SO_TIMESTAMPING. However, only the SOF_TIMESTAMPING_SYS_HARDWARE
@ -1031,8 +1032,36 @@ SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.
req |= SOF_TIMESTAMPING_SYS_HARDWARE;
setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
If PACKET_TIMESTAMP is not set, a software timestamp generated inside
the networking stack is used (the behavior before this setting was added).
For the mmap(2)ed ring buffers, such timestamps are stored in the
tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine
what kind of timestamp has been reported, the tp_status field is binary |'ed
with the following possible bits ...
TP_STATUS_TS_SYS_HARDWARE
TP_STATUS_TS_RAW_HARDWARE
TP_STATUS_TS_SOFTWARE
... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the
RX_RING, if none of those 3 are set (i.e. PACKET_TIMESTAMP is not set),
then this means that a software fallback was invoked *within* PF_PACKET's
processing code (less precise).
Getting timestamps for the TX_RING works as follows: i) fill the ring frames,
ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant
frames to be updated resp. the frame handed over to the application, iv) walk
through the frames to pick up the individual hw/sw timestamps.
Only (!) if transmit timestamping is enabled, then these bits are combined
with binary | with TP_STATUS_AVAILABLE, so you must check for that in your
application (e.g. !(tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))
in a first step to see if the frame belongs to the application, and then
one can extract the type of timestamp in a second step from tp_status)!
If you don't care about them, thus having it disabled, checking for
TP_STATUS_AVAILABLE resp. TP_STATUS_WRONG_FORMAT is sufficient. If in the
TX_RING part only TP_STATUS_AVAILABLE is set, then the tp_sec and tp_{n,u}sec
members do not contain a valid value. For TX_RINGs, by default no timestamp
is generated!
See include/linux/net_tstamp.h and Documentation/networking/timestamping
for more information on hardware timestamps.

View File

@ -86,19 +86,24 @@ struct tpacket_auxdata {
};
/* Rx ring - header status */
#define TP_STATUS_KERNEL 0x0
#define TP_STATUS_USER 0x1
#define TP_STATUS_COPY 0x2
#define TP_STATUS_LOSING 0x4
#define TP_STATUS_CSUMNOTREADY 0x8
#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */
#define TP_STATUS_BLK_TMO 0x20
#define TP_STATUS_KERNEL 0
#define TP_STATUS_USER (1 << 0)
#define TP_STATUS_COPY (1 << 1)
#define TP_STATUS_LOSING (1 << 2)
#define TP_STATUS_CSUMNOTREADY (1 << 3)
#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */
#define TP_STATUS_BLK_TMO (1 << 5)
/* Tx ring - header status */
#define TP_STATUS_AVAILABLE 0x0
#define TP_STATUS_SEND_REQUEST 0x1
#define TP_STATUS_SENDING 0x2
#define TP_STATUS_WRONG_FORMAT 0x4
#define TP_STATUS_AVAILABLE 0
#define TP_STATUS_SEND_REQUEST (1 << 0)
#define TP_STATUS_SENDING (1 << 1)
#define TP_STATUS_WRONG_FORMAT (1 << 2)
/* Rx and Tx ring - header status */
#define TP_STATUS_TS_SOFTWARE (1 << 29)
#define TP_STATUS_TS_SYS_HARDWARE (1 << 30)
#define TP_STATUS_TS_RAW_HARDWARE (1 << 31)
/* Rx ring - feature request bits */
#define TP_FT_REQ_FILL_RXHASH 0x1

View File

@ -3327,12 +3327,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
if (!sk)
return;
skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!skb)
return;
if (hwtstamps) {
*skb_hwtstamps(skb) =
*skb_hwtstamps(orig_skb) =
*hwtstamps;
} else {
/*
@ -3340,9 +3336,13 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
* so keep the shared tx_flags and only
* store software time stamp
*/
skb->tstamp = ktime_get_real();
orig_skb->tstamp = ktime_get_real();
}
skb = skb_clone(orig_skb, GFP_ATOMIC);
if (!skb)
return;
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;

View File

@ -339,6 +339,59 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
}
}
static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
unsigned int flags)
{
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
if (shhwtstamps) {
if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
return TP_STATUS_TS_SYS_HARDWARE;
if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
return TP_STATUS_TS_RAW_HARDWARE;
}
if (ktime_to_timespec_cond(skb->tstamp, ts))
return TP_STATUS_TS_SOFTWARE;
return 0;
}
static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
struct sk_buff *skb)
{
union tpacket_uhdr h;
struct timespec ts;
__u32 ts_status;
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
return 0;
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
h.h1->tp_sec = ts.tv_sec;
h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
break;
case TPACKET_V2:
h.h2->tp_sec = ts.tv_sec;
h.h2->tp_nsec = ts.tv_nsec;
break;
case TPACKET_V3:
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
}
/* one flush is safe, as both fields always lie on the same cacheline */
flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
smp_wmb();
return ts_status;
}
static void *packet_lookup_frame(struct packet_sock *po,
struct packet_ring_buffer *rb,
unsigned int position,
@ -1657,26 +1710,6 @@ drop:
return 0;
}
static void tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
unsigned int flags)
{
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
if (shhwtstamps) {
if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
return;
if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
return;
}
if (ktime_to_timespec_cond(skb->tstamp, ts))
return;
getnstimeofday(ts);
}
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
@ -1691,6 +1724,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
unsigned short macoff, netoff, hdrlen;
struct sk_buff *copy_skb = NULL;
struct timespec ts;
__u32 ts_status;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
@ -1773,7 +1807,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
spin_unlock(&sk->sk_receive_queue.lock);
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
tpacket_get_timestamp(skb, &ts, po->tp_tstamp);
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
getnstimeofday(&ts);
status |= ts_status;
switch (po->tp_version) {
case TPACKET_V1:
@ -1874,10 +1912,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
void *ph;
if (likely(po->tx_ring.pg_vec)) {
__u32 ts;
ph = skb_shinfo(skb)->destructor_arg;
BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
atomic_dec(&po->tx_ring.pending);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
}
sock_wfree(skb);
@ -1900,6 +1942,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->dev = dev;
skb->priority = po->sk.sk_priority;
skb->mark = po->sk.sk_mark;
sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
skb_shinfo(skb)->destructor_arg = ph.raw;
switch (po->tp_version) {