1 #### This file contains configuration parameters for the Solarflare 2 #### network adapter driver "sfxge" 3 4 ################################################################################ 5 #### HOW TO MAKE CHANGES TO THIS FILE TAKE EFFECT 6 ################################################################################ 7 #### In order for changes made to this file to be used by the driver either: 8 #### i) reboot 9 #### OR 10 #### ii) Unplumb all sfxge inet,inet6 interfaces or link aggregations: e.g. 11 #### $ ifconfig sfxge0 unplumb 12 #### $ ifconfig sfxge0 inet6 unplumb 13 #### Delete any link aggregations that setup (see dladm show-aggr): 14 #### $ dladm delete-aggr <key> 15 #### Update the driver config 16 #### $ update_drv sfxge 17 #### Note update_drv will unload then reload the driver, otherwise it warns: 18 #### "Cannot unload module: sfxge" 19 #### Then plumb interfaces and reconfigure IP addresses as required 20 #### 21 ################################################################################ 22 23 ################################################################################ 24 #### PER PORT SETTINGS 25 ################################################################################ 26 #### Note that is possible to have different settings per network port 27 #### First using the file "/etc/path_to_inst" you can identify some information 28 #### 29 #### $ grep sfxge /etc/path_to_inst 30 #### # "/pci@3a,0/pci8086,340c@5/pci1924,6201@0" 4 "sfxge" 31 #### # "/pci@3a,0/pci8086,340c@5/pci1924,6201@0,1" 5 "sfxge" 32 #### # ^^^^^^^^^^^^^^^^^^^^^^^^ ^^^ 33 #### # parent unit-address 34 #### 35 #### Using the information gathered above to create entries in this file 36 #### based on the example below; this tries to allocate 4 MSI-X interrupts 37 #### to port0 and 8 MSI-X interrupts to port 1 38 #### 39 #### name="sfxge" parent="/pci@3a,0/pci8086,340c@5" unit-address="0" 40 #### rx_scale_count=4; 41 #### name="sfxge" parent="/pci@3a,0/pci8086,340c@5" unit-address="0,1" 42 #### rx_scale_count=8; 43 ################################################################################ 44 45 ################################################################################ 46 #### RECEIVE SIDE SCALING 47 ################################################################################ 48 #### Maximum number of RSS-channels to use per port (default 0, global max 32) 49 #### This spreads RX flows across CPUs using multiple MSI-X interrupts 50 #### 51 #### - A value of 0 tries to allocate the same number of RSS-channels as logical 52 #### CPUs in order to provide the best spreading accross the entire system. 53 #### 54 #### - actual number may be lower due to availability of MSI-X interrupts 55 #### 56 #### - to successfully use >2 MSI-X interrupts per port you also need to use 57 #### "set ddi_msix_alloc_limit=8" (maximum) in /etc/system and reboot 58 #### OR temporarily adjust this limit using mdb and then reload the driver 59 #### # mdb -kw 60 #### > ddi_msix_alloc_limit/W 8 61 #### ddi_msix_alloc_limit: 0x2 = 0x8 62 #### > ddi_msix_alloc_limit/X 63 #### ddi_msix_alloc_limit: 64 #### ddi_msix_alloc_limit: 8 65 #### 66 #### - Note that the total MSI-X interrupt vector assigned to all network 67 #### drivers (in IPL 6; interrupt priority level) is 32 68 #### See the "INTERUPT PRIORITY" section for a way overcome this 69 #### 70 #### - You can see the current allocation with this run as root: 71 #### $ echo "::interrupts -d" | mdb -k | egrep 'IRQ| 6 ' 72 #### IRQ Vector IPL Bus Type CPU Share APIC/INT# Driver Name(s) 73 #### 93 0x60 6 MSI-X 2 1 - sfxge#0 74 #### 94 0x61 6 MSI-X 3 1 - sfxge#0 75 #### ^^^ 76 #### 77 #### - If the sysadmin does not take care to partition the interupts between 78 #### the network ports then later ports could use a single legacy interrupt 79 #### i.e. no RSS, significantly reducing performance 80 #### See PER PORT SETTINGS ABOVE for how to apply rx_scale_count per port 81 #### 82 #### - rx_scale_count can be manipulated dynamically via ndd but the number 83 #### of interrupts will stay the same; though only a subset will fire for RX. 84 #### All interrupts will still see ~10 interrupts/sec 85 #### 86 #### - note that the number of RSS-channels can be set to be greater than the 87 #### number of logical CPUs. This can be useful to prevent some types of RX 88 #### drops 89 90 rx_scale_count=0; 91 92 ################################################################################ 93 #### USING NON_DEFAULT INTERRUPT PRIORITY LEVEL (IPL) 94 ################################################################################ 95 #### To overcome the Solaris limitation of 32 MSI-X vectors per IPL 96 #### it is possible to set this driver to allocate interrupts in a different IPL 97 #### Please only do this if necessary 98 #### 99 #### See http://download.oracle.com/docs/cd/E20815_01/html/E20816/gjtag.html 100 #### Verify with: 101 #### $ echo "::interrupts -d" | mdb -k | egrep 'IRQ|sfxge' 102 #### IRQ Vector IPL Bus Type CPU Share APIC/INT# Driver Name(s) 103 #### 93 0x60 5 MSI-X 2 1 - sfxge#0 104 #### 94 0x61 5 MSI-X 3 1 - sfxge#0 105 #### ^^^ 106 #### 107 #### (default IPL 6; the document referenced recommends 5 or 4) 108 109 #interrupt-priorities=5; 110 111 ################################################################################ 112 #### RX DMA RING SIZE 113 ################################################################################ 114 #### Number of entries in the per receive queue hardware DMA ring 115 #### 116 #### Increase this value if you see increasing values of "rx_nodesc_drop_cnt" 117 #### as reported by "kstat -m sfxge -c mac -s rx_nodesc_drop_cnt" and you think 118 #### the traffic is bursty 119 #### 120 #### Increasing this value can increase driver memory usage and reduce the 121 #### cache-hit rates seen for RX buffers so the largest value is not 122 #### necessarily the best 123 #### 124 #### (minimum 512) 125 #### (default 1024) 126 #### (maximum 4096) 127 #### The value must be a power of two else it will be ignored. 128 129 #rxq_size=1024; 130 131 ################################################################################ 132 #### LARGE RECEIVE OFFLOAD 133 ################################################################################ 134 #### Coalesce RX packets (aka Large Receive Offload) 135 #### 0 => off (default) 136 #### 1 => on 137 #### 2 => on, respecting TCP PSH boundaries 138 139 #rx_coalesce_mode=1; 140 141 ################################################################################ 142 #### INTERRUPT MODERATION 143 ################################################################################ 144 #### Interrupt moderation in us (default 30, minimum 0) 145 #### Decreasing this reduces latency but increases interrupt rate and therefore 146 #### CPU usage which will decrease maximum bandwidth achievable 147 #### 148 #### Decrease this value if you see increasing values of "rx_nodesc_drop_cnt" 149 #### as reported by "kstat -m sfxge -c mac -s rx_nodesc_drop_cnt" so that 150 #### the driver has more opportunities to refill the hardware RX descriptor 151 #### ring 152 #### 153 #### (maximum 20000 us) 154 #### (default 30 us) 155 156 #intr_moderation=10; 157 158 ################################################################################ 159 #### MAXIMUM MTU 160 ################################################################################ 161 #### Maximum MTU of an sfxge interface (in bytes - excludes ethernet framing) 162 #### Increase this value to support Jumbo frames 163 #### 164 #### NB increasing this value consumes more memory for all RX buffers 165 #### even for network interfaces not configured with high MTUs 166 #### 167 #### MTUs over 3968 mean that >1 contiguous 4k page of memory are allocated 168 #### for all receive buffers. Contiguous pages of memory can be harder for 169 #### the OS to allocate when under memory pressue. 170 #### 171 #### In Solaris 11 the MTU can be dynamically changed so you should not 172 #### need to alter the setting below. A non-default MTU can be persistently 173 #### configured depending on your network configuration: 174 #### 1) For manual mode or using NWAM with the DefaultFixed profile: 175 #### dladm set-linkprop -p mtu=<MTU> sfxge<n> 176 #### 2) If using NWAM using a custom profile please set the property link-mtu: 177 #### netcfg "select ncp myprofile; select ncu phys sfxge0; set link-mtu=9000" 178 #### 179 #### (maximum 9000) 180 #### (default 1500) 181 182 #mtu=1500; 183 184 ################################################################################ 185 #### PER PORT MEMORY LIMIT FOR RX PACKET BUFFERS 186 ################################################################################ 187 #### Limit max memory for RX packets allocated per instance (port) in bytes 188 #### 189 #### RX packets are passed up from the driver to the kernel TCP/IP stack 190 #### and freed after the data is delivered to the application socket 191 #### buffers. If the OS falls behind this allocation can grow. 192 #### 193 #### This parameter can help make behaviour reasonable when approaching 194 #### an overload condition. Other ways to avoid overload would be RSS 195 #### 196 #### The size of each bufer, number in use can be seen with 197 #### echo "::kmastat" | mdb -k 198 #### OR kstat -c kmem_cache -n sfxgeX_rx_packet_cache # replace X 199 #### 200 #### (default unlimited when unset) 201 #### 202 #### Note In Solarflare driver package versions 3.1.x.xxxx and earlier set 203 #### the RX buffering limit to 10773741824 (1GB) per interface in this file. 204 #### 205 #### 64MB of RX buffering per interface is still oversized for nearly all 206 #### applications but provides a hard limit. 207 208 rx_pkt_mem_max=67108864; 209 210 ################################################################################ 211 #### PREALLOCATION OF RX PACKET BUFFERS 212 ################################################################################ 213 #### Number of rx packet buffers to allocate at start of an rxq and keep a 214 #### free packet pool of atleast this many. 215 #### 216 #### Keeping a free packet pool of rx packet buffers means we do not need to 217 #### repeatedly allocate and map dma buffers. 218 #### 219 #### (minimum 0 => off) 220 #### (default 0 => off) 221 #### (maximum is limited by available memory) 222 223 #rx_prealloc_pkt_buffers=512; 224 225 ################################################################################ 226 #### ACTION ON HARDWARE ERRORS 227 ################################################################################ 228 #### This parameter controls the action taken on a hardware error 229 #### which may be a PCIe error or the driver detecting unexpected behaviour 230 #### from the hardware 231 #### 232 #### Currently this driver does not reports error into the Solaris fault 233 #### management architecture (but the PCIe root-port may do so) 234 #### 235 #### 0 => recover the server adapter to a working state 236 #### 1 => do not advertise to the kernel that the link is down during the reset 237 #### 2 => reset the hardware, but do not attempt to use it again. 238 #### this is useful if you have a failover mechanism, and want to ensure 239 #### that this server adapter does not become the active link again 240 #### note that the interface will stay plumbed but will not pass traffic 241 #### 242 #### All of the above log a message and increment a kstat counter viewable with: 243 #### kstat -m sfxge -c mon -s num_restarts_hw_err 244 #### 245 #### (default 0 => recover) 246 247 action_on_hw_err=0; 248