Merge branch 'af_packet-timestamp'
Daniel Borkmann says: ==================== This is a joint effort with Willem to bring optional i) tx hw/sw timestamping into PF_PACKET, that was reported by Paul Chavent, and ii) to expose the type of timestamp to the user, which is in the current situation not possible to distinguish with the RX_RING and TX_RING API (but distinguishable through the normal timestamping API), reported by Richard Cochran. This set is based on top of ``packet: account statistics only in tpacket_stats_u''. Related discussion can be found in: http://patchwork.ozlabs.org/patch/238125/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
660f7d2229
|
@ -1016,10 +1016,11 @@ retry_block:
|
|||
-------------------------------------------------------------------------------
|
||||
|
||||
The PACKET_TIMESTAMP setting determines the source of the timestamp in
|
||||
the packet meta information. If your NIC is capable of timestamping
|
||||
packets in hardware, you can request those hardware timestamps to used.
|
||||
Note: you may need to enable the generation of hardware timestamps with
|
||||
SIOCSHWTSTAMP.
|
||||
the packet meta information for mmap(2)ed RX_RING and TX_RINGs. If your
|
||||
NIC is capable of timestamping packets in hardware, you can request those
|
||||
hardware timestamps to be used. Note: you may need to enable the generation
|
||||
of hardware timestamps with SIOCSHWTSTAMP (see related information from
|
||||
Documentation/networking/timestamping.txt).
|
||||
|
||||
PACKET_TIMESTAMP accepts the same integer bit field as
|
||||
SO_TIMESTAMPING. However, only the SOF_TIMESTAMPING_SYS_HARDWARE
|
||||
|
@ -1031,8 +1032,36 @@ SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.
|
|||
req |= SOF_TIMESTAMPING_SYS_HARDWARE;
|
||||
setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
|
||||
|
||||
If PACKET_TIMESTAMP is not set, a software timestamp generated inside
|
||||
the networking stack is used (the behavior before this setting was added).
|
||||
For the mmap(2)ed ring buffers, such timestamps are stored in the
|
||||
tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine
|
||||
what kind of timestamp has been reported, the tp_status field is binary |'ed
|
||||
with the following possible bits ...
|
||||
|
||||
TP_STATUS_TS_SYS_HARDWARE
|
||||
TP_STATUS_TS_RAW_HARDWARE
|
||||
TP_STATUS_TS_SOFTWARE
|
||||
|
||||
... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the
|
||||
RX_RING, if none of those 3 are set (i.e. PACKET_TIMESTAMP is not set),
|
||||
then this means that a software fallback was invoked *within* PF_PACKET's
|
||||
processing code (less precise).
|
||||
|
||||
Getting timestamps for the TX_RING works as follows: i) fill the ring frames,
|
||||
ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant
|
||||
frames to be updated resp. the frame handed over to the application, iv) walk
|
||||
through the frames to pick up the individual hw/sw timestamps.
|
||||
|
||||
Only (!) if transmit timestamping is enabled, then these bits are combined
|
||||
with binary | with TP_STATUS_AVAILABLE, so you must check for that in your
|
||||
application (e.g. !(tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))
|
||||
in a first step to see if the frame belongs to the application, and then
|
||||
one can extract the type of timestamp in a second step from tp_status)!
|
||||
|
||||
If you don't care about them, thus having it disabled, checking for
|
||||
TP_STATUS_AVAILABLE resp. TP_STATUS_WRONG_FORMAT is sufficient. If in the
|
||||
TX_RING part only TP_STATUS_AVAILABLE is set, then the tp_sec and tp_{n,u}sec
|
||||
members do not contain a valid value. For TX_RINGs, by default no timestamp
|
||||
is generated!
|
||||
|
||||
See include/linux/net_tstamp.h and Documentation/networking/timestamping
|
||||
for more information on hardware timestamps.
|
||||
|
|
|
@ -86,19 +86,24 @@ struct tpacket_auxdata {
|
|||
};
|
||||
|
||||
/* Rx ring - header status */
|
||||
#define TP_STATUS_KERNEL 0x0
|
||||
#define TP_STATUS_USER 0x1
|
||||
#define TP_STATUS_COPY 0x2
|
||||
#define TP_STATUS_LOSING 0x4
|
||||
#define TP_STATUS_CSUMNOTREADY 0x8
|
||||
#define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */
|
||||
#define TP_STATUS_BLK_TMO 0x20
|
||||
#define TP_STATUS_KERNEL 0
|
||||
#define TP_STATUS_USER (1 << 0)
|
||||
#define TP_STATUS_COPY (1 << 1)
|
||||
#define TP_STATUS_LOSING (1 << 2)
|
||||
#define TP_STATUS_CSUMNOTREADY (1 << 3)
|
||||
#define TP_STATUS_VLAN_VALID (1 << 4) /* auxdata has valid tp_vlan_tci */
|
||||
#define TP_STATUS_BLK_TMO (1 << 5)
|
||||
|
||||
/* Tx ring - header status */
|
||||
#define TP_STATUS_AVAILABLE 0x0
|
||||
#define TP_STATUS_SEND_REQUEST 0x1
|
||||
#define TP_STATUS_SENDING 0x2
|
||||
#define TP_STATUS_WRONG_FORMAT 0x4
|
||||
#define TP_STATUS_AVAILABLE 0
|
||||
#define TP_STATUS_SEND_REQUEST (1 << 0)
|
||||
#define TP_STATUS_SENDING (1 << 1)
|
||||
#define TP_STATUS_WRONG_FORMAT (1 << 2)
|
||||
|
||||
/* Rx and Tx ring - header status */
|
||||
#define TP_STATUS_TS_SOFTWARE (1 << 29)
|
||||
#define TP_STATUS_TS_SYS_HARDWARE (1 << 30)
|
||||
#define TP_STATUS_TS_RAW_HARDWARE (1 << 31)
|
||||
|
||||
/* Rx ring - feature request bits */
|
||||
#define TP_FT_REQ_FILL_RXHASH 0x1
|
||||
|
|
|
@ -3327,12 +3327,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
|
|||
if (!sk)
|
||||
return;
|
||||
|
||||
skb = skb_clone(orig_skb, GFP_ATOMIC);
|
||||
if (!skb)
|
||||
return;
|
||||
|
||||
if (hwtstamps) {
|
||||
*skb_hwtstamps(skb) =
|
||||
*skb_hwtstamps(orig_skb) =
|
||||
*hwtstamps;
|
||||
} else {
|
||||
/*
|
||||
|
@ -3340,9 +3336,13 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
|
|||
* so keep the shared tx_flags and only
|
||||
* store software time stamp
|
||||
*/
|
||||
skb->tstamp = ktime_get_real();
|
||||
orig_skb->tstamp = ktime_get_real();
|
||||
}
|
||||
|
||||
skb = skb_clone(orig_skb, GFP_ATOMIC);
|
||||
if (!skb)
|
||||
return;
|
||||
|
||||
serr = SKB_EXT_ERR(skb);
|
||||
memset(serr, 0, sizeof(*serr));
|
||||
serr->ee.ee_errno = ENOMSG;
|
||||
|
|
|
@ -339,6 +339,59 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
|
|||
}
|
||||
}
|
||||
|
||||
static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
|
||||
|
||||
if (shhwtstamps) {
|
||||
if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
|
||||
ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
|
||||
return TP_STATUS_TS_SYS_HARDWARE;
|
||||
if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
|
||||
ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
|
||||
return TP_STATUS_TS_RAW_HARDWARE;
|
||||
}
|
||||
|
||||
if (ktime_to_timespec_cond(skb->tstamp, ts))
|
||||
return TP_STATUS_TS_SOFTWARE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
union tpacket_uhdr h;
|
||||
struct timespec ts;
|
||||
__u32 ts_status;
|
||||
|
||||
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
|
||||
return 0;
|
||||
|
||||
h.raw = frame;
|
||||
switch (po->tp_version) {
|
||||
case TPACKET_V1:
|
||||
h.h1->tp_sec = ts.tv_sec;
|
||||
h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
|
||||
break;
|
||||
case TPACKET_V2:
|
||||
h.h2->tp_sec = ts.tv_sec;
|
||||
h.h2->tp_nsec = ts.tv_nsec;
|
||||
break;
|
||||
case TPACKET_V3:
|
||||
default:
|
||||
WARN(1, "TPACKET version not supported.\n");
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* one flush is safe, as both fields always lie on the same cacheline */
|
||||
flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
|
||||
smp_wmb();
|
||||
|
||||
return ts_status;
|
||||
}
|
||||
|
||||
static void *packet_lookup_frame(struct packet_sock *po,
|
||||
struct packet_ring_buffer *rb,
|
||||
unsigned int position,
|
||||
|
@ -1657,26 +1710,6 @@ drop:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
|
||||
|
||||
if (shhwtstamps) {
|
||||
if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
|
||||
ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
|
||||
return;
|
||||
if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
|
||||
ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
|
||||
return;
|
||||
}
|
||||
|
||||
if (ktime_to_timespec_cond(skb->tstamp, ts))
|
||||
return;
|
||||
|
||||
getnstimeofday(ts);
|
||||
}
|
||||
|
||||
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
|
||||
struct packet_type *pt, struct net_device *orig_dev)
|
||||
{
|
||||
|
@ -1691,6 +1724,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
|
|||
unsigned short macoff, netoff, hdrlen;
|
||||
struct sk_buff *copy_skb = NULL;
|
||||
struct timespec ts;
|
||||
__u32 ts_status;
|
||||
|
||||
if (skb->pkt_type == PACKET_LOOPBACK)
|
||||
goto drop;
|
||||
|
@ -1773,7 +1807,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
|
|||
spin_unlock(&sk->sk_receive_queue.lock);
|
||||
|
||||
skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
|
||||
tpacket_get_timestamp(skb, &ts, po->tp_tstamp);
|
||||
|
||||
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
|
||||
getnstimeofday(&ts);
|
||||
|
||||
status |= ts_status;
|
||||
|
||||
switch (po->tp_version) {
|
||||
case TPACKET_V1:
|
||||
|
@ -1874,10 +1912,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
|
|||
void *ph;
|
||||
|
||||
if (likely(po->tx_ring.pg_vec)) {
|
||||
__u32 ts;
|
||||
|
||||
ph = skb_shinfo(skb)->destructor_arg;
|
||||
BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
|
||||
atomic_dec(&po->tx_ring.pending);
|
||||
__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
|
||||
|
||||
ts = __packet_set_timestamp(po, ph, skb);
|
||||
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
|
||||
}
|
||||
|
||||
sock_wfree(skb);
|
||||
|
@ -1900,6 +1942,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
|
|||
skb->dev = dev;
|
||||
skb->priority = po->sk.sk_priority;
|
||||
skb->mark = po->sk.sk_mark;
|
||||
sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
|
||||
skb_shinfo(skb)->destructor_arg = ph.raw;
|
||||
|
||||
switch (po->tp_version) {
|
||||
|
|
Loading…
Reference in New Issue