From cc88d534dfb7d6e07e9b6e511b6b0629eef97cee Mon Sep 17 00:00:00 2001
From: A-star-ayush <myselfthebest@yahoo.com>
Date: Wed, 26 Jul 2017 01:32:07 +0530
Subject: [PATCH] tcp: rfc 6298 & 7323: updated rto calculations and semantics
---
.../kernel/network/protocols/tcp/TCPEndpoint.cpp | 80 +++++++++++++---------
.../kernel/network/protocols/tcp/TCPEndpoint.h | 7 +-
src/add-ons/kernel/network/protocols/tcp/tcp.h | 2 +
3 files changed, 55 insertions(+), 34 deletions(-)
diff --git a/src/add-ons/kernel/network/protocols/tcp/TCPEndpoint.cpp b/src/add-ons/kernel/network/protocols/tcp/TCPEndpoint.cpp
index 0a7c5fa..0f0339a 100644
a
|
b
|
TCPEndpoint::TCPEndpoint(net_socket* socket)
|
436 | 436 | fReceiveWindow(socket->receive.buffer_size), |
437 | 437 | fReceiveMaxSegmentSize(TCP_DEFAULT_MAX_SEGMENT_SIZE), |
438 | 438 | fReceiveQueue(socket->receive.buffer_size), |
439 | | fRoundTripTime(TCP_INITIAL_RTT / kTimestampFactor), |
440 | | fRoundTripDeviation(TCP_INITIAL_RTT / kTimestampFactor), |
| 439 | fSmoothedRoundTripTime(0), |
| 440 | fRoundTripVariation(0), |
| 441 | fSendTime(0), |
441 | 442 | fRetransmitTimeout(TCP_INITIAL_RTT), |
442 | 443 | fReceivedTimestamp(0), |
443 | 444 | fCongestionWindow(0), |
… |
… |
TCPEndpoint::_SendQueued(bool force, uint32 sendWindow)
|
2124 | 2125 | return status; |
2125 | 2126 | } |
2126 | 2127 | |
| 2128 | if(fSendTime == 0 && (segmentLength != 0 || segment.flags & TCP_FLAG_SYNCHRONIZE)) |
| 2129 | fSendTime = tcp_now(); |
| 2130 | |
2127 | 2131 | if (shouldStartRetransmitTimer && size > 0) { |
2128 | 2132 | TRACE("starting initial retransmit timer of: %" B_PRIdBIGTIME, |
2129 | 2133 | fRetransmitTimeout); |
… |
… |
TCPEndpoint::_Acknowledged(tcp_segment_header& segment)
|
2211 | 2215 | if (fSendNext < fSendUnacknowledged) |
2212 | 2216 | fSendNext = fSendUnacknowledged; |
2213 | 2217 | |
2214 | | if (segment.options & TCP_HAS_TIMESTAMPS) |
2215 | | _UpdateRoundTripTime(tcp_diff_timestamp(segment.timestamp_reply)); |
2216 | | else { |
2217 | | // TODO: Fallback to RFC 793 type estimation; This just resets |
2218 | | // any potential exponential back off that happened due to |
2219 | | // retransmits. |
2220 | | fRetransmitTimeout = TCP_INITIAL_RTT; |
| 2218 | if (fFlags & FLAG_OPTION_TIMESTAMP) { |
| 2219 | uint32 flightSize = (fSendMax - fSendUnacknowledged).Number(); |
| 2220 | _UpdateRoundTripTime(tcp_diff_timestamp(segment.timestamp_reply), |
| 2221 | 1 + ((flightSize - 1) / (fSendMaxSegmentSize << 1))); |
| 2222 | } |
| 2223 | |
| 2224 | // Karn's algorithm: RTT measurement must not be made using segments that were retransmitted |
| 2225 | else if (fSendTime > 1 && fSendNext == fSendMax) { |
| 2226 | _UpdateRoundTripTime(tcp_diff_timestamp(fSendTime), 1); |
| 2227 | fSendTime = 1; |
2221 | 2228 | } |
2222 | 2229 | |
2223 | 2230 | if (fSendUnacknowledged == fSendMax) { |
2224 | 2231 | TRACE("all acknowledged, cancelling retransmission timer"); |
2225 | 2232 | gStackModule->cancel_timer(&fRetransmitTimer); |
2226 | 2233 | T(TimerSet(this, "retransmit", -1)); |
| 2234 | |
| 2235 | fSendTime = 0; |
| 2236 | |
2227 | 2237 | } else { |
2228 | 2238 | TRACE("data acknowledged, resetting retransmission timer to: %" |
2229 | 2239 | B_PRIdBIGTIME, fRetransmitTimeout); |
… |
… |
TCPEndpoint::_Acknowledged(tcp_segment_header& segment)
|
2261 | 2271 | void |
2262 | 2272 | TCPEndpoint::_Retransmit() |
2263 | 2273 | { |
2264 | | TRACE("Retransmit()"); |
| 2274 | if (fState < ESTABLISHED) { |
| 2275 | fRetransmitTimeout = TCP_SYN_RETRANSMIT_TIMEOUT; |
| 2276 | fCongestionWindow = fSendMaxSegmentSize; |
| 2277 | } else { |
| 2278 | _ResetSlowStart(); |
2265 | 2279 | |
2266 | | _ResetSlowStart(); |
2267 | | fSendNext = fSendUnacknowledged; |
| 2280 | // Do exponential back off of the retransmit timeout |
| 2281 | fRetransmitTimeout *= 2; |
| 2282 | if (fRetransmitTimeout > TCP_MAX_RETRANSMIT_TIMEOUT) |
| 2283 | fRetransmitTimeout = TCP_MAX_RETRANSMIT_TIMEOUT; |
| 2284 | } |
2268 | 2285 | |
2269 | | // Do exponential back off of the retransmit timeout |
2270 | | fRetransmitTimeout *= 2; |
2271 | | if (fRetransmitTimeout > TCP_MAX_RETRANSMIT_TIMEOUT) |
2272 | | fRetransmitTimeout = TCP_MAX_RETRANSMIT_TIMEOUT; |
| 2286 | TRACE("Retransmit()"); |
2273 | 2287 | |
| 2288 | fSendNext = fSendUnacknowledged; |
2274 | 2289 | _SendQueued(); |
2275 | 2290 | } |
2276 | 2291 | |
2277 | 2292 | |
2278 | 2293 | void |
2279 | | TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime) |
| 2294 | TCPEndpoint::_UpdateRoundTripTime(int32 roundTripTime, uint32 expectedSamples) |
2280 | 2295 | { |
2281 | | int32 rtt = roundTripTime; |
2282 | | |
2283 | | // "smooth" round trip time as per Van Jacobson |
2284 | | rtt -= fRoundTripTime / 8; |
2285 | | fRoundTripTime += rtt; |
2286 | | if (rtt < 0) |
2287 | | rtt = -rtt; |
2288 | | rtt -= fRoundTripDeviation / 4; |
2289 | | fRoundTripDeviation += rtt; |
| 2296 | if(fSmoothedRoundTripTime == 0) { |
| 2297 | fSmoothedRoundTripTime = roundTripTime; |
| 2298 | fRoundTripVariation = roundTripTime >> 1; |
| 2299 | fRetransmitTimeout = (fSmoothedRoundTripTime + max_c(100, fRoundTripVariation << 2)) |
| 2300 | * kTimestampFactor; |
| 2301 | } else { |
| 2302 | int32 delta = fSmoothedRoundTripTime - roundTripTime; |
| 2303 | if (delta < 0) |
| 2304 | delta = -delta; |
| 2305 | fRoundTripVariation += ((delta - fRoundTripVariation) >> 2) / expectedSamples; |
| 2306 | fSmoothedRoundTripTime += ((roundTripTime - fSmoothedRoundTripTime) >> 3) / expectedSamples; |
| 2307 | fRetransmitTimeout = (fSmoothedRoundTripTime + max_c(100, fRoundTripVariation << 2)) |
| 2308 | * kTimestampFactor; |
| 2309 | } |
2290 | 2310 | |
2291 | | fRetransmitTimeout = ((fRoundTripTime / 4 + fRoundTripDeviation) / 2) |
2292 | | * kTimestampFactor; |
2293 | 2311 | if (fRetransmitTimeout < TCP_MIN_RETRANSMIT_TIMEOUT) |
2294 | 2312 | fRetransmitTimeout = TCP_MIN_RETRANSMIT_TIMEOUT; |
2295 | 2313 | |
… |
… |
TCPEndpoint::_RetransmitTimer(net_timer* timer, void* _endpoint)
|
2317 | 2335 | T(TimerTriggered(endpoint, "retransmit")); |
2318 | 2336 | |
2319 | 2337 | MutexLocker locker(endpoint->fLock); |
2320 | | if (!locker.IsLocked()) |
| 2338 | if (!locker.IsLocked() || gStackModule->is_timer_active(timer)) |
2321 | 2339 | return; |
2322 | 2340 | |
2323 | 2341 | endpoint->_Retransmit(); |
… |
… |
TCPEndpoint::Dump() const
|
2445 | 2463 | fInitialReceiveSequence.Number()); |
2446 | 2464 | kprintf(" duplicate acknowledge count: %" B_PRIu32 "\n", |
2447 | 2465 | fDuplicateAcknowledgeCount); |
2448 | | kprintf(" round trip time: %" B_PRId32 " (deviation %" B_PRId32 ")\n", |
2449 | | fRoundTripTime, fRoundTripDeviation); |
| 2466 | kprintf(" smoothed round trip time: %" B_PRId32 " (variation %" B_PRId32 ")\n", |
| 2467 | fSmoothedRoundTripTime, fRoundTripVariation); |
2450 | 2468 | kprintf(" retransmit timeout: %" B_PRId64 "\n", fRetransmitTimeout); |
2451 | 2469 | kprintf(" congestion window: %" B_PRIu32 "\n", fCongestionWindow); |
2452 | 2470 | kprintf(" slow start threshold: %" B_PRIu32 "\n", fSlowStartThreshold); |
diff --git a/src/add-ons/kernel/network/protocols/tcp/TCPEndpoint.h b/src/add-ons/kernel/network/protocols/tcp/TCPEndpoint.h
index 1ff167b..0aff429 100644
a
|
b
|
private:
|
106 | 106 | status_t _PrepareSendPath(const sockaddr* peer); |
107 | 107 | void _Acknowledged(tcp_segment_header& segment); |
108 | 108 | void _Retransmit(); |
109 | | void _UpdateRoundTripTime(int32 roundTripTime); |
| 109 | void _UpdateRoundTripTime(int32 roundTripTime, uint32 expectedSamples); |
110 | 110 | void _ResetSlowStart(); |
111 | 111 | void _DuplicateAcknowledge(tcp_segment_header& segment); |
112 | 112 | |
… |
… |
private:
|
164 | 164 | tcp_sequence fInitialReceiveSequence; |
165 | 165 | |
166 | 166 | // round trip time and retransmit timeout computation |
167 | | int32 fRoundTripTime; |
168 | | int32 fRoundTripDeviation; |
| 167 | int32 fSmoothedRoundTripTime; |
| 168 | int32 fRoundTripVariation; |
| 169 | uint32 fSendTime; |
169 | 170 | bigtime_t fRetransmitTimeout; |
170 | 171 | |
171 | 172 | uint32 fReceivedTimestamp; |
diff --git a/src/add-ons/kernel/network/protocols/tcp/tcp.h b/src/add-ons/kernel/network/protocols/tcp/tcp.h
index 6f30ec2..24890a3 100644
a
|
b
|
operator==(tcp_sequence a, tcp_sequence b)
|
193 | 193 | #define TCP_MIN_RETRANSMIT_TIMEOUT 200000 // 200 msecs |
194 | 194 | // Maximum retransmit timeout (per RFC6298) |
195 | 195 | #define TCP_MAX_RETRANSMIT_TIMEOUT 60000000 // 60 secs |
| 196 | // New value for timeout in case of lost SYN (RFC 6298) |
| 197 | #define TCP_SYN_RETRANSMIT_TIMEOUT 3000000 // 3 secs |
196 | 198 | |
197 | 199 | struct tcp_sack { |
198 | 200 | uint32 left_edge; |