[Linux-kernel-mentees] [PATCH 1/2] net: reorder members of virtnet_info for optimization

Anant Thazhemadam anant.thazhemadam at gmail.com
Wed Sep 30 05:17:21 UTC 2020


Analysis of the structure virtnet_info using pahole gives the
following stats.
	/* size: 256, cachelines: 4, members: 25 */
	/* sum members: 245, holes: 3, sum holes: 11 */
	/* paddings: 1, sum paddings: 4 */

Reordering the order in which the members of virtnet_info are declared
helps in packing byte holes in the middle of virtnet_info, reduce the
size required by the structure by 8 bytes, and also allows members to be
stored without overstepping the boundaries of a cacheline (for a
cacheline of size 64bytes) unnecessarily.

Analysis using pahole post-reordering of members gives the following
stats.
	/* size: 248, cachelines: 4, members: 25 */
        /* padding: 3 */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 56 bytes */

Signed-off-by: Anant Thazhemadam <anant.thazhemadam at gmail.com>
---
The complete analysis done by pahole can be found below.
Before the change:
		struct virtnet_info {
		struct virtio_device *     vdev;                 /*     0     8 */
		struct virtqueue *         cvq;                  /*     8     8 */
		struct net_device *        dev;                  /*    16     8 */
		struct send_queue *        sq;                   /*    24     8 */
		struct receive_queue *     rq;                   /*    32     8 */
		unsigned int               status;               /*    40     4 */
		u16                        max_queue_pairs;      /*    44     2 */
		u16                        curr_queue_pairs;     /*    46     2 */
		u16                        xdp_queue_pairs;      /*    48     2 */
		bool                       big_packets;          /*    50     1 */
		bool                       mergeable_rx_bufs;    /*    51     1 */
		bool                       has_cvq;              /*    52     1 */
		bool                       any_header_sg;        /*    53     1 */
		u8                         hdr_len;              /*    54     1 */

		/* XXX 1 byte hole, try to pack */

		struct delayed_work refill;                      /*    56    88 */

		/* XXX last struct has 4 bytes of padding */

		/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
		struct work_struct config_work;                  /*   144    32 */
		bool                       affinity_hint_set;    /*   176     1 */

		/* XXX 7 bytes hole, try to pack */

		struct hlist_node  node;                         /*   184    16 */
		/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
		struct hlist_node  node_dead;                    /*   200    16 */
		struct control_buf *       ctrl;                 /*   216     8 */
		u8                         duplex;               /*   224     1 */

		/* XXX 3 bytes hole, try to pack */

		u32                        speed;                /*   228     4 */
		long unsigned int          guest_offloads;       /*   232     8 */
		long unsigned int          guest_offloads_capable; /*   240     8 */
		struct failover *          failover;             /*   248     8 */

		/* size: 256, cachelines: 4, members: 25 */
		/* sum members: 245, holes: 3, sum holes: 11 */
		/* paddings: 1, sum paddings: 4 */
	};

After the Change:
	struct virtnet_info {
		struct virtio_device *     vdev;                 /*     0     8 */
		struct virtqueue *         cvq;                  /*     8     8 */
		struct net_device *        dev;                  /*    16     8 */
		struct send_queue *        sq;                   /*    24     8 */
		struct receive_queue *     rq;                   /*    32     8 */
		unsigned int               status;               /*    40     4 */
		u16                        max_queue_pairs;      /*    44     2 */
		u16                        curr_queue_pairs;     /*    46     2 */
		u16                        xdp_queue_pairs;      /*    48     2 */
		bool                       big_packets;          /*    50     1 */
		bool                       mergeable_rx_bufs;    /*    51     1 */
		bool                       has_cvq;              /*    52     1 */
		bool                       any_header_sg;        /*    53     1 */
		bool                       affinity_hint_set;    /*    54     1 */
		u8                         hdr_len;              /*    55     1 */
		struct control_buf *       ctrl;                 /*    56     8 */
		/* --- cacheline 1 boundary (64 bytes) --- */
		struct work_struct config_work;                  /*    64    32 */
		struct hlist_node  node;                         /*    96    16 */
		struct hlist_node  node_dead;                    /*   112    16 */
		/* --- cacheline 2 boundary (128 bytes) --- */
		long unsigned int          guest_offloads;       /*   128     8 */
		long unsigned int          guest_offloads_capable; /*   136     8 */
		struct failover *          failover;             /*   144     8 */
		struct delayed_work refill;                      /*   152    88 */

		/* XXX last struct has 4 bytes of padding */

		/* --- cacheline 3 boundary (192 bytes) was 48 bytes ago --- */
		u32                        speed;                /*   240     4 */
		u8                         duplex;               /*   244     1 */

		/* size: 248, cachelines: 4, members: 25 */
		/* padding: 3 */
		/* paddings: 1, sum paddings: 4 */
		/* last cacheline: 56 bytes */
	};

It can be seen that the size has reduced by 8 bytes, and the holes have been eliminated
as well. Also, more members of virtnet_info are accomodated within one cacheline 
(without unnecessarily crossing over the cacheline boundary).


 drivers/net/virtio_net.c | 42 ++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 263b005981bd..32747f1980ae 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -137,29 +137,29 @@ struct receive_queue {
 
 	struct napi_struct napi;
 
+	/* Name of this receive queue: input.$index */
+	char name[40];
+
 	struct bpf_prog __rcu *xdp_prog;
 
 	struct virtnet_rq_stats stats;
 
+	/* RX: fragments + linear part + virtio header */
+	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+
+	/* Page frag for packet buffer allocation. */
+	struct page_frag alloc_frag;
+
 	/* Chain pages by the private ptr. */
 	struct page *pages;
 
 	/* Average packet length for mergeable receive buffers. */
 	struct ewma_pkt_len mrg_avg_pkt_len;
 
-	/* Page frag for packet buffer allocation. */
-	struct page_frag alloc_frag;
-
-	/* RX: fragments + linear part + virtio header */
-	struct scatterlist sg[MAX_SKB_FRAGS + 2];
+	struct xdp_rxq_info xdp_rxq;
 
 	/* Min single buffer size for mergeable buffers case. */
 	unsigned int min_buf_len;
-
-	/* Name of this receive queue: input.$index */
-	char name[40];
-
-	struct xdp_rxq_info xdp_rxq;
 };
 
 /* Control VQ buffers: protected by the rtnl lock */
@@ -202,33 +202,33 @@ struct virtnet_info {
 	/* Host can handle any s/g split between our header and packet data */
 	bool any_header_sg;
 
+	/* Does the affinity hint is set for virtqueues? */
+	bool affinity_hint_set;
+
 	/* Packet virtio header size */
 	u8 hdr_len;
 
-	/* Work struct for refilling if we run low on memory. */
-	struct delayed_work refill;
+	struct control_buf *ctrl;
 
 	/* Work struct for config space updates */
 	struct work_struct config_work;
 
-	/* Does the affinity hint is set for virtqueues? */
-	bool affinity_hint_set;
-
 	/* CPU hotplug instances for online & dead */
 	struct hlist_node node;
 	struct hlist_node node_dead;
 
-	struct control_buf *ctrl;
-
-	/* Ethtool settings */
-	u8 duplex;
-	u32 speed;
-
 	unsigned long guest_offloads;
 	unsigned long guest_offloads_capable;
 
 	/* failover when STANDBY feature enabled */
 	struct failover *failover;
+
+	/* Work struct for refilling if we run low on memory. */
+	struct delayed_work refill;
+
+	/* Ethtool settings */
+	u32 speed;
+	u8 duplex;
 };
 
 struct padded_vnet_hdr {
-- 
2.25.1



More information about the Linux-kernel-mentees mailing list