VirtualBox

source: vbox/trunk/src/VBox/Devices/Network/slirp/ip_icmpwin.c

Last change on this file was 98103, checked in by vboxsync, 16 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Data Id Revision
File size: 14.9 KB
Line 
1/* $Id: ip_icmpwin.c 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * NAT - Windows ICMP API based ping proxy.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#include "slirp.h"
29#include "ip_icmp.h"
30
31#include <winternl.h> /* for PIO_APC_ROUTINE &c */
32#ifndef PIO_APC_ROUTINE_DEFINED
33# define PIO_APC_ROUTINE_DEFINED 1
34#endif
35#include <iprt/win/iphlpapi.h>
36#include <icmpapi.h>
37
38/*
39 * A header of ICMP ECHO. Intended for storage, unlike struct icmp
40 * which is intended to be overlayed onto a buffer.
41 */
42struct icmp_echo {
43 uint8_t icmp_type;
44 uint8_t icmp_code;
45 uint16_t icmp_cksum;
46 uint16_t icmp_echo_id;
47 uint16_t icmp_echo_seq;
48};
49
50AssertCompileSize(struct icmp_echo, 8);
51
52
53struct pong {
54 PNATState pData;
55
56 TAILQ_ENTRY(pong) queue_entry;
57
58 union {
59 struct ip ip;
60 uint8_t au[60];
61 } reqiph;
62 struct icmp_echo reqicmph;
63
64 size_t bufsize;
65 uint8_t buf[1];
66};
67
68
69static VOID WINAPI icmpwin_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved);
70static VOID WINAPI icmpwin_callback_old(void *ctx);
71
72static void icmpwin_callback(struct pong *pong);
73static void icmpwin_pong(struct pong *pong);
74
75static struct mbuf *icmpwin_get_error(struct pong *pong, int type, int code);
76static struct mbuf *icmpwin_get_mbuf(PNATState pData, size_t reqsize);
77
78
79/*
80 * On Windows XP and Windows Server 2003 IcmpSendEcho2() callback
81 * is FARPROC, but starting from Vista it's PIO_APC_ROUTINE with
82 * two extra arguments. Callbacks use WINAPI (stdcall) calling
83 * convention with callee responsible for popping the arguments,
84 * so to avoid stack corruption we check windows version at run
85 * time and provide correct callback.
86 *
87 * XXX: this is system-wide, but what about multiple NAT threads?
88 */
89static PIO_APC_ROUTINE g_pfnIcmpCallback;
90
91
92int
93icmpwin_init(PNATState pData)
94{
95 if (g_pfnIcmpCallback == NULL)
96 {
97 OSVERSIONINFO osvi;
98 int status;
99
100 ZeroMemory(&osvi, sizeof(OSVERSIONINFO));
101 osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
102 status = GetVersionEx(&osvi);
103 if (status == 0)
104 return 1;
105
106 if (osvi.dwMajorVersion >= 6)
107 g_pfnIcmpCallback = icmpwin_callback_apc;
108 else
109 g_pfnIcmpCallback = (PIO_APC_ROUTINE)icmpwin_callback_old;
110 }
111
112 TAILQ_INIT(&pData->pongs_expected);
113 TAILQ_INIT(&pData->pongs_received);
114
115 pData->icmp_socket.sh = IcmpCreateFile();
116 pData->phEvents[VBOX_ICMP_EVENT_INDEX] = CreateEvent(NULL, FALSE, FALSE, NULL);
117
118 return 0;
119}
120
121
122void
123icmpwin_finit(PNATState pData)
124{
125 IcmpCloseHandle(pData->icmp_socket.sh);
126
127 while (!TAILQ_EMPTY(&pData->pongs_received)) {
128 struct pong *pong = TAILQ_FIRST(&pData->pongs_received);
129 TAILQ_REMOVE(&pData->pongs_received, pong, queue_entry);
130 RTMemFree(pong);
131 }
132
133 /* this should be empty */
134 while (!TAILQ_EMPTY(&pData->pongs_expected)) {
135 struct pong *pong = TAILQ_FIRST(&pData->pongs_expected);
136 TAILQ_REMOVE(&pData->pongs_expected, pong, queue_entry);
137 pong->pData = NULL;
138 }
139}
140
141
142/*
143 * Outgoing ping from guest.
144 */
145void
146icmpwin_ping(PNATState pData, struct mbuf *m, int hlen)
147{
148 struct ip *ip = mtod(m, struct ip *);
149 size_t reqsize, pongsize;
150 uint8_t ttl;
151 size_t bufsize;
152 struct pong *pong;
153 IPAddr dst;
154 IP_OPTION_INFORMATION opts;
155 void *reqdata;
156 int status;
157
158 ttl = ip->ip_ttl;
159 AssertReturnVoid(ttl > 0);
160
161 size_t hdrsize = hlen + sizeof(struct icmp_echo);
162 reqsize = ip->ip_len - hdrsize;
163
164 bufsize = sizeof(ICMP_ECHO_REPLY);
165 if (reqsize < sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo))
166 bufsize += sizeof(IO_STATUS_BLOCK) + sizeof(struct icmp_echo);
167 else
168 bufsize += reqsize;
169 bufsize += 16; /* whatever that is; empirically at least XP needs it */
170
171 pongsize = RT_UOFFSETOF(struct pong, buf) + bufsize;
172 if (pData->cbIcmpPending + pongsize > 1024 * 1024)
173 return;
174
175 pong = RTMemAlloc(pongsize);
176 if (RT_UNLIKELY(pong == NULL))
177 return;
178
179 pong->pData = pData;
180 pong->bufsize = bufsize;
181 m_copydata(m, 0, hlen, (caddr_t)&pong->reqiph);
182 m_copydata(m, hlen, sizeof(struct icmp_echo), (caddr_t)&pong->reqicmph);
183 AssertReturnVoid(pong->reqicmph.icmp_type == ICMP_ECHO);
184
185 if (m->m_next == NULL)
186 {
187 /* already in single contiguous buffer */
188 reqdata = mtod(m, char *) + hdrsize;
189 }
190 else
191 {
192 /* use reply buffer as temporary storage */
193 reqdata = pong->buf;
194 m_copydata(m, (int)hdrsize, (int)reqsize, reqdata);
195 }
196
197 dst = ip->ip_dst.s_addr;
198
199 opts.Ttl = ttl;
200 opts.Tos = ip->ip_tos; /* affected by DisableUserTOSSetting key */
201 opts.Flags = (ip->ip_off & IP_DF) != 0 ? IP_FLAG_DF : 0;
202 opts.OptionsSize = 0;
203 opts.OptionsData = 0;
204
205
206 status = IcmpSendEcho2(pData->icmp_socket.sh, NULL,
207 g_pfnIcmpCallback, pong,
208 dst, reqdata, (WORD)reqsize, &opts,
209 pong->buf, (DWORD)pong->bufsize,
210 5 * 1000 /* ms */);
211
212 if (RT_UNLIKELY(status != 0))
213 {
214 Log2(("NAT: IcmpSendEcho2: unexpected status %d\n", status));
215 }
216 else if ((status = GetLastError()) != ERROR_IO_PENDING)
217 {
218 int code;
219
220 Log2(("NAT: IcmpSendEcho2: error %d\n", status));
221 switch (status) {
222 case ERROR_NETWORK_UNREACHABLE:
223 code = ICMP_UNREACH_NET;
224 break;
225 case ERROR_HOST_UNREACHABLE:
226 code = ICMP_UNREACH_HOST;
227 break;
228 default:
229 code = -1;
230 break;
231 }
232
233 if (code != -1) /* send icmp error */
234 {
235 struct mbuf *em = icmpwin_get_error(pong, ICMP_UNREACH, code);
236 if (em != NULL)
237 {
238 struct ip *eip = mtod(em, struct ip *);
239 eip->ip_src = alias_addr;
240 ip_output(pData, NULL, em);
241 }
242 }
243 }
244 else /* success */
245 {
246 Log2(("NAT: pong %p for ping %RTnaipv4 id 0x%04x seq %d len %zu (%zu)\n",
247 pong, dst,
248 RT_N2H_U16(pong->reqicmph.icmp_echo_id),
249 RT_N2H_U16(pong->reqicmph.icmp_echo_seq),
250 pongsize, reqsize));
251
252 pData->cbIcmpPending += pongsize;
253 TAILQ_INSERT_TAIL(&pData->pongs_expected, pong, queue_entry);
254 pong = NULL; /* callback owns it now */
255 }
256
257 if (pong != NULL)
258 RTMemFree(pong);
259}
260
261
262static VOID WINAPI
263icmpwin_callback_apc(void *ctx, PIO_STATUS_BLOCK iob, ULONG reserved)
264{
265 struct pong *pong = (struct pong *)ctx;
266 if (pong != NULL)
267 icmpwin_callback(pong);
268 RT_NOREF2(iob, reserved);
269}
270
271
272static VOID WINAPI
273icmpwin_callback_old(void *ctx)
274{
275 struct pong *pong = (struct pong *)ctx;
276 if (pong != NULL)
277 icmpwin_callback(pong);
278}
279
280
281/*
282 * Actual callback code for IcmpSendEcho2(). OS version specific
283 * trampoline will free "pong" argument for us.
284 *
285 * Since async callback can be called anytime the thread is alertable,
286 * it's not safe to do any processing here. Instead queue it and
287 * notify the main loop.
288 */
289static void
290icmpwin_callback(struct pong *pong)
291{
292 PNATState pData = pong->pData;
293
294 if (pData == NULL)
295 {
296 RTMemFree(pong);
297 return;
298 }
299
300#ifdef DEBUG
301 {
302 struct pong *expected, *already;
303
304 TAILQ_FOREACH(expected, &pData->pongs_expected, queue_entry)
305 {
306 if (expected == pong)
307 break;
308 }
309 Assert(expected);
310
311 TAILQ_FOREACH(already, &pData->pongs_received, queue_entry)
312 {
313 if (already == pong)
314 break;
315 }
316 Assert(!already);
317 }
318#endif
319
320 TAILQ_REMOVE(&pData->pongs_expected, pong, queue_entry);
321 TAILQ_INSERT_TAIL(&pData->pongs_received, pong, queue_entry);
322
323 WSASetEvent(pData->phEvents[VBOX_ICMP_EVENT_INDEX]);
324}
325
326
327void
328icmpwin_process(PNATState pData)
329{
330 struct pong_tailq pongs;
331
332 if (TAILQ_EMPTY(&pData->pongs_received))
333 return;
334
335 TAILQ_INIT(&pongs);
336 TAILQ_CONCAT(&pongs, &pData->pongs_received, queue_entry);
337
338 while (!TAILQ_EMPTY(&pongs)) {
339 struct pong *pong = TAILQ_FIRST(&pongs);
340 size_t sz;
341
342 sz = RT_UOFFSETOF(struct pong, buf) + pong->bufsize;
343 Assert(pData->cbIcmpPending >= sz);
344 pData->cbIcmpPending -= sz;
345
346 icmpwin_pong(pong);
347
348 TAILQ_REMOVE(&pongs, pong, queue_entry);
349 RTMemFree(pong);
350 }
351}
352
353
354void
355icmpwin_pong(struct pong *pong)
356{
357 PNATState pData;
358 DWORD nreplies;
359 ICMP_ECHO_REPLY *reply;
360 struct mbuf *m;
361 struct ip *ip;
362 struct icmp_echo *icmp;
363 size_t reqsize;
364
365 pData = pong->pData; /* to make slirp_state.h macro hackery work */
366
367 nreplies = IcmpParseReplies(pong->buf, (DWORD)pong->bufsize);
368 if (nreplies == 0)
369 {
370 DWORD error = GetLastError();
371 if (error == IP_REQ_TIMED_OUT)
372 Log2(("NAT: ping %p timed out\n", (void *)pong));
373 else
374 Log2(("NAT: ping %p: IcmpParseReplies: error %d\n",
375 (void *)pong, error));
376 return;
377 }
378
379 reply = (ICMP_ECHO_REPLY *)pong->buf;
380
381 if (reply->Status == IP_SUCCESS)
382 {
383 if (reply->Options.OptionsSize != 0) /* don't do options */
384 return;
385
386 /* need to remap &reply->Address ? */
387 if (/* not a mapped loopback */ 1)
388 {
389 if (reply->Options.Ttl <= 1)
390 return;
391 --reply->Options.Ttl;
392 }
393
394 reqsize = reply->DataSize;
395 if ( (reply->Options.Flags & IP_FLAG_DF) != 0
396 && sizeof(struct ip) + sizeof(struct icmp_echo) + reqsize > (size_t)if_mtu)
397 return;
398
399 m = icmpwin_get_mbuf(pData, reqsize);
400 if (m == NULL)
401 return;
402
403 ip = mtod(m, struct ip *);
404 icmp = (struct icmp_echo *)(mtod(m, char *) + sizeof(*ip));
405
406 /* fill in ip (ip_output0() does the boilerplate for us) */
407 ip->ip_tos = reply->Options.Tos;
408 ip->ip_len = sizeof(*ip) + sizeof(*icmp) + (int)reqsize;
409 ip->ip_off = 0;
410 ip->ip_ttl = reply->Options.Ttl;
411 ip->ip_p = IPPROTO_ICMP;
412 ip->ip_src.s_addr = reply->Address;
413 ip->ip_dst = pong->reqiph.ip.ip_src;
414
415 icmp->icmp_type = ICMP_ECHOREPLY;
416 icmp->icmp_code = 0;
417 icmp->icmp_cksum = 0;
418 icmp->icmp_echo_id = pong->reqicmph.icmp_echo_id;
419 icmp->icmp_echo_seq = pong->reqicmph.icmp_echo_seq;
420
421 m_append(pData, m, (int)reqsize, reply->Data);
422
423 icmp->icmp_cksum = in_cksum_skip(m, ip->ip_len, sizeof(*ip));
424 }
425 else {
426 uint8_t type, code;
427
428 switch (reply->Status) {
429 case IP_DEST_NET_UNREACHABLE:
430 type = ICMP_UNREACH; code = ICMP_UNREACH_NET;
431 break;
432 case IP_DEST_HOST_UNREACHABLE:
433 type = ICMP_UNREACH; code = ICMP_UNREACH_HOST;
434 break;
435 case IP_DEST_PROT_UNREACHABLE:
436 type = ICMP_UNREACH; code = ICMP_UNREACH_PROTOCOL;
437 break;
438 case IP_PACKET_TOO_BIG:
439 type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG;
440 break;
441 case IP_SOURCE_QUENCH:
442 type = ICMP_SOURCEQUENCH; code = 0;
443 break;
444 case IP_TTL_EXPIRED_TRANSIT:
445 type = ICMP_TIMXCEED; code = ICMP_TIMXCEED_INTRANS;
446 break;
447 case IP_TTL_EXPIRED_REASSEM:
448 type = ICMP_TIMXCEED; code = ICMP_TIMXCEED_REASS;
449 break;
450 default:
451 Log2(("NAT: ping reply status %d, dropped\n", reply->Status));
452 return;
453 }
454
455 Log2(("NAT: ping status %d -> type %d/code %d\n",
456 reply->Status, type, code));
457
458 /*
459 * XXX: we don't know the TTL of the request at the time this
460 * ICMP error was generated (we can guess it was 1 for ttl
461 * exceeded, but don't bother faking it).
462 */
463 m = icmpwin_get_error(pong, type, code);
464 if (m == NULL)
465 return;
466
467 ip = mtod(m, struct ip *);
468
469 ip->ip_tos = reply->Options.Tos;
470 ip->ip_ttl = reply->Options.Ttl; /* XXX: decrement */
471 ip->ip_src.s_addr = reply->Address;
472 }
473
474 Assert(ip->ip_len == m_length(m, NULL));
475 ip_output(pData, NULL, m);
476}
477
478
479/*
480 * Prepare mbuf with ICMP error type/code.
481 * IP source must be filled by the caller.
482 */
483static struct mbuf *
484icmpwin_get_error(struct pong *pong, int type, int code)
485{
486 PNATState pData = pong->pData;
487 struct mbuf *m;
488 struct ip *ip;
489 struct icmp_echo *icmp;
490 size_t reqsize;
491
492 Log2(("NAT: ping error type %d/code %d\n", type, code));
493
494 size_t reqhlen = pong->reqiph.ip.ip_hl << 2;
495 reqsize = reqhlen + sizeof(pong->reqicmph);
496
497 m = icmpwin_get_mbuf(pData, reqsize);
498 if (m == NULL)
499 return NULL;
500
501 ip = mtod(m, struct ip *);
502 icmp = (struct icmp_echo *)(mtod(m, char *) + sizeof(*ip));
503
504 ip->ip_tos = 0;
505 ip->ip_len = sizeof(*ip) + sizeof(*icmp) + (int)reqsize;
506 ip->ip_off = 0;
507 ip->ip_ttl = IPDEFTTL;
508 ip->ip_p = IPPROTO_ICMP;
509 ip->ip_src.s_addr = 0; /* NB */
510 ip->ip_dst = pong->reqiph.ip.ip_src;
511
512 icmp->icmp_type = type;
513 icmp->icmp_code = code;
514 icmp->icmp_cksum = 0;
515 icmp->icmp_echo_id = 0;
516 icmp->icmp_echo_seq = 0;
517
518 /* payload: the IP and ICMP headers of the original request */
519 m_append(pData, m, (int)reqhlen, (caddr_t)&pong->reqiph);
520 m_append(pData, m, sizeof(pong->reqicmph), (caddr_t)&pong->reqicmph);
521
522 icmp->icmp_cksum = in_cksum_skip(m, ip->ip_len, sizeof(*ip));
523
524 return m;
525}
526
527
528/*
529 * Replacing original simple slirp mbufs with real mbufs from freebsd
530 * was a bit messy since assumption are different. This leads to
531 * rather ugly code at times. Hide the gore here.
532 */
533static struct mbuf *
534icmpwin_get_mbuf(PNATState pData, size_t reqsize)
535{
536 struct mbuf *m;
537
538 reqsize += if_maxlinkhdr;
539 reqsize += sizeof(struct ip) + sizeof(struct icmp_echo);
540
541 if (reqsize <= MHLEN)
542 /* good pings come in small packets */
543 m = m_gethdr(pData, M_NOWAIT, MT_HEADER);
544 else
545 m = m_getjcl(pData, M_NOWAIT, MT_HEADER, M_PKTHDR, (int)slirp_size(pData));
546
547 if (m == NULL)
548 return NULL;
549
550 m->m_flags |= M_SKIP_FIREWALL;
551 m->m_data += if_maxlinkhdr; /* reserve leading space for ethernet header */
552
553 m->m_pkthdr.header = mtod(m, void *);
554 m->m_len = sizeof(struct ip) + sizeof(struct icmp_echo);
555
556 return m;
557}
558
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use