VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/linux/SUPLib-linux.cpp

Last change on this file was 108725, checked in by vboxsync, 6 weeks ago

HostDrivers/Support: Make SUPR3 work on hosts where the page size is not known during compile time, bugref:10391 [build fix]

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 12.4 KB
Line 
1/* $Id: SUPLib-linux.cpp 108725 2025-03-24 18:54:47Z vboxsync $ */
2/** @file
3 * VirtualBox Support Library - GNU/Linux specific parts.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#define LOG_GROUP LOG_GROUP_SUP
42#ifdef IN_SUP_HARDENED_R3
43# undef DEBUG /* Warning: disables RT_STRICT */
44# undef RT_STRICT
45# ifndef LOG_DISABLED
46# define LOG_DISABLED
47# endif
48# define RTLOG_REL_DISABLED
49# include <iprt/log.h>
50#endif
51
52#include <sys/fcntl.h>
53#include <sys/ioctl.h>
54#include <sys/mman.h>
55#include <errno.h>
56#include <unistd.h>
57#include <stdlib.h>
58#include <malloc.h>
59
60#include <VBox/log.h>
61#include <VBox/sup.h>
62#include <iprt/path.h>
63#include <iprt/assert.h>
64#include <VBox/types.h>
65#include <iprt/string.h>
66#include <iprt/system.h>
67#include <VBox/err.h>
68#include <VBox/param.h>
69#include "../SUPLibInternal.h"
70#include "../SUPDrvIOC.h"
71
72
73/*********************************************************************************************************************************
74* Defined Constants And Macros *
75*********************************************************************************************************************************/
76/** System device name. */
77#define DEVICE_NAME_SYS "/dev/vboxdrv"
78/** User device name. */
79#define DEVICE_NAME_USR "/dev/vboxdrvu"
80
81/* define MADV_DONTFORK if it's missing from the system headers. */
82#ifndef MADV_DONTFORK
83# define MADV_DONTFORK 10
84#endif
85
86
87
88DECLHIDDEN(int) suplibOsInit(PSUPLIBDATA pThis, bool fPreInited, uint32_t fFlags, SUPINITOP *penmWhat, PRTERRINFO pErrInfo)
89{
90 RT_NOREF2(penmWhat, pErrInfo);
91
92 /*
93 * Nothing to do if pre-inited.
94 */
95 if (fPreInited)
96 return VINF_SUCCESS;
97 Assert(pThis->hDevice == (intptr_t)NIL_RTFILE);
98
99 /*
100 * Check if madvise works.
101 */
102 uint32_t const cbPage = SUP_PAGE_SIZE;
103 void *pv = mmap(NULL, cbPage, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
104 if (pv == MAP_FAILED)
105 return VERR_NO_MEMORY;
106 pThis->fSysMadviseWorks = (0 == madvise(pv, cbPage, MADV_DONTFORK));
107 munmap(pv, cbPage);
108
109 /*
110 * Driverless?
111 */
112 if (fFlags & SUPR3INIT_F_DRIVERLESS)
113 {
114 pThis->fDriverless = true;
115 return VINF_SUCCESS;
116 }
117
118 /*
119 * Try open the device.
120 */
121 const char *pszDeviceNm = fFlags & SUPR3INIT_F_UNRESTRICTED ? DEVICE_NAME_SYS : DEVICE_NAME_USR;
122 int hDevice = open(pszDeviceNm, O_RDWR, 0);
123 if (hDevice < 0)
124 {
125 /*
126 * Try load the device.
127 */
128 hDevice = open(pszDeviceNm, O_RDWR, 0);
129 if (hDevice < 0)
130 {
131 int rc;
132 switch (errno)
133 {
134 case ENXIO: /* see man 2 open, ENODEV is actually a kernel bug */
135 case ENODEV: rc = VERR_VM_DRIVER_LOAD_ERROR; break;
136 case EPERM:
137 case EACCES: rc = VERR_VM_DRIVER_NOT_ACCESSIBLE; break;
138 case ENOENT: rc = VERR_VM_DRIVER_NOT_INSTALLED; break;
139 default: rc = VERR_VM_DRIVER_OPEN_ERROR; break;
140 }
141 if (fFlags & SUPR3INIT_F_DRIVERLESS_MASK)
142 {
143 LogRel(("Failed to open \"%s\", errno=%d, rc=%Rrc - Switching to driverless mode.\n", pszDeviceNm, errno, rc));
144 pThis->fDriverless = true;
145 return VINF_SUCCESS;
146 }
147 LogRel(("Failed to open \"%s\", errno=%d, rc=%Rrc\n", pszDeviceNm, errno, rc));
148 return rc;
149 }
150 }
151
152 /*
153 * Mark the file handle close on exec.
154 */
155 if (fcntl(hDevice, F_SETFD, FD_CLOEXEC) == -1)
156 {
157 close(hDevice);
158#ifdef IN_SUP_HARDENED_R3
159 return VERR_INTERNAL_ERROR;
160#else
161 return RTErrConvertFromErrno(errno);
162#endif
163 }
164
165 /*
166 * We're done.
167 */
168 pThis->hDevice = hDevice;
169 pThis->fUnrestricted = RT_BOOL(fFlags & SUPR3INIT_F_UNRESTRICTED);
170 return VINF_SUCCESS;
171}
172
173
174DECLHIDDEN(int) suplibOsTerm(PSUPLIBDATA pThis)
175{
176 /*
177 * Close the device if it's actually open.
178 */
179 if (pThis->hDevice != (intptr_t)NIL_RTFILE)
180 {
181 if (close(pThis->hDevice))
182 AssertFailed();
183 pThis->hDevice = (intptr_t)NIL_RTFILE;
184 }
185
186 return 0;
187}
188
189
190#ifndef IN_SUP_HARDENED_R3
191
192DECLHIDDEN(int) suplibOsInstall(void)
193{
194 // nothing to do on Linux
195 return VERR_NOT_IMPLEMENTED;
196}
197
198
199DECLHIDDEN(int) suplibOsUninstall(void)
200{
201 // nothing to do on Linux
202 return VERR_NOT_IMPLEMENTED;
203}
204
205
206DECLHIDDEN(int) suplibOsIOCtl(PSUPLIBDATA pThis, uintptr_t uFunction, void *pvReq, size_t cbReq)
207{
208 AssertMsg(pThis->hDevice != (intptr_t)NIL_RTFILE, ("SUPLIB not initiated successfully!\n"));
209 NOREF(cbReq);
210
211 /*
212 * Issue device iocontrol.
213 */
214 if (RT_LIKELY(ioctl(pThis->hDevice, uFunction, pvReq) >= 0))
215 return VINF_SUCCESS;
216
217 /* This is the reverse operation of the one found in SUPDrv-linux.c */
218 switch (errno)
219 {
220 case EACCES: return VERR_GENERAL_FAILURE;
221 case EINVAL: return VERR_INVALID_PARAMETER;
222 case EILSEQ: return VERR_INVALID_MAGIC;
223 case ENXIO: return VERR_INVALID_HANDLE;
224 case EFAULT: return VERR_INVALID_POINTER;
225 case ENOLCK: return VERR_LOCK_FAILED;
226 case EEXIST: return VERR_ALREADY_LOADED;
227 case EPERM: return VERR_PERMISSION_DENIED;
228 case ENOSYS: return VERR_VERSION_MISMATCH;
229 case 1000: return VERR_IDT_FAILED;
230 }
231
232 return RTErrConvertFromErrno(errno);
233}
234
235
236DECLHIDDEN(int) suplibOsIOCtlFast(PSUPLIBDATA pThis, uintptr_t uFunction, uintptr_t idCpu)
237{
238 int rc = ioctl(pThis->hDevice, uFunction, idCpu);
239 if (rc == -1)
240 rc = -errno;
241 return rc;
242}
243
244
245DECLHIDDEN(int) suplibOsPageAlloc(PSUPLIBDATA pThis, size_t cPages, uint32_t fFlags, void **ppvPages)
246{
247 /*
248 * If large pages are requested, try use the MAP_HUGETBL flags. This takes
249 * pages from the reserved huge page pool (see sysctl vm.nr_hugepages) and
250 * is typically not configured. Also, when the pool is exhausted we get
251 * ENOMEM back at us. So, when it fails try again w/o MAP_HUGETLB.
252 */
253 int fMmap = MAP_PRIVATE | MAP_ANONYMOUS;
254#ifdef MAP_HUGETLB
255 if ((fFlags & SUP_PAGE_ALLOC_F_LARGE_PAGES) && !(cPages & 511))
256 fMmap |= MAP_HUGETLB;
257#endif
258
259 uint32_t const cbPage = SUP_PAGE_SIZE;
260 uint32_t const cPageShift = SUP_PAGE_SHIFT;
261
262 size_t cbMmap = cPages << cPageShift;
263 if ( !pThis->fSysMadviseWorks
264 && (fFlags & (SUP_PAGE_ALLOC_F_FOR_LOCKING | SUP_PAGE_ALLOC_F_LARGE_PAGES)) == SUP_PAGE_ALLOC_F_FOR_LOCKING)
265 cbMmap += cbPage * 2;
266
267 uint8_t *pbPages = (uint8_t *)mmap(NULL, cbMmap, PROT_READ | PROT_WRITE, fMmap, -1, 0);
268#ifdef MAP_HUGETLB
269 if (pbPages == MAP_FAILED && (fMmap & MAP_HUGETLB))
270 {
271 /* Try again without MAP_HUGETLB if mmap fails: */
272 fMmap &= ~MAP_HUGETLB;
273 if (!pThis->fSysMadviseWorks && (fFlags & SUP_PAGE_ALLOC_F_FOR_LOCKING))
274 cbMmap = (cPages + 2) << cPageShift;
275 pbPages = (uint8_t *)mmap(NULL, cbMmap, PROT_READ | PROT_WRITE, fMmap, -1, 0);
276 }
277#endif
278 if (pbPages != MAP_FAILED)
279 {
280 if ( !(fFlags & SUP_PAGE_ALLOC_F_FOR_LOCKING)
281 || pThis->fSysMadviseWorks
282#ifdef MAP_HUGETLB
283 || (fMmap & MAP_HUGETLB)
284#endif
285 )
286 {
287 /*
288 * It is not fatal if we fail here but a forked child (e.g. the ALSA sound server)
289 * could crash. Linux < 2.6.16 does not implement madvise(MADV_DONTFORK) but the
290 * kernel seems to split bigger VMAs and that is all that we want -- later we set the
291 * VM_DONTCOPY attribute in supdrvOSLockMemOne().
292 */
293 if ( madvise(pbPages, cbMmap, MADV_DONTFORK)
294#ifdef MAP_HUGETLB
295 && !(fMmap & MAP_HUGETLB)
296#endif
297 )
298 LogRel(("SUPLib: madvise %p-%p failed\n", pbPages, cbMmap));
299
300#ifdef MADV_HUGEPAGE
301 /*
302 * Try enable transparent huge pages for the allocation if desired
303 * and we weren't able to use MAP_HUGETBL above.
304 * Note! KVM doesn't seem to benefit much from this.
305 */
306 if ( !(fMmap & MAP_HUGETLB)
307 && (fFlags & SUP_PAGE_ALLOC_F_LARGE_PAGES)
308 && !(cPages & 511)) /** @todo PORTME: x86 assumption */
309 madvise(pbPages, cbMmap, MADV_HUGEPAGE);
310#endif
311 }
312 else
313 {
314 /*
315 * madvise(MADV_DONTFORK) is not available (most probably Linux 2.4). Enclose any
316 * mmapped region by two unmapped pages to guarantee that there is exactly one VM
317 * area struct of the very same size as the mmap area.
318 */
319 mprotect(pbPages, cbPage, PROT_NONE);
320 mprotect(pbPages + cbMmap - cbPage, cbPage, PROT_NONE);
321 pbPages += cPageShift;
322 }
323
324 /** @todo Dunno why we do this, really. It's a waste of time. Maybe it was
325 * to try make sure the pages were allocated or something before we locked them,
326 * so I qualified it with SUP_PAGE_ALLOC_F_FOR_LOCKING (unused) for now... */
327 if (fFlags & SUP_PAGE_ALLOC_F_FOR_LOCKING)
328 memset(pbPages, 0, cPages << cPageShift);
329
330 *ppvPages = pbPages;
331 return VINF_SUCCESS;
332 }
333 return VERR_NO_MEMORY;
334}
335
336
337DECLHIDDEN(int) suplibOsPageFree(PSUPLIBDATA pThis, void *pvPages, size_t cPages)
338{
339 NOREF(pThis);
340 munmap(pvPages, cPages << SUP_PAGE_SHIFT);
341 return VINF_SUCCESS;
342}
343
344
345/**
346 * Check if the host kernel supports VT-x or not.
347 *
348 * Older Linux kernels clear the VMXE bit in the CR4 register (function
349 * tlb_flush_all()) leading to a host kernel panic.
350 *
351 * @returns VBox status code (no info).
352 * @param ppszWhy Where to return explanatory message.
353 */
354DECLHIDDEN(int) suplibOsQueryVTxSupported(const char **ppszWhy)
355{
356 char szBuf[256];
357 int rc = RTSystemQueryOSInfo(RTSYSOSINFO_RELEASE, szBuf, sizeof(szBuf));
358 if (RT_SUCCESS(rc))
359 {
360 char *pszNext;
361 uint32_t uA, uB, uC;
362
363 rc = RTStrToUInt32Ex(szBuf, &pszNext, 10, &uA);
364 if ( RT_SUCCESS(rc)
365 && *pszNext == '.')
366 {
367 /*
368 * new version number scheme starting with Linux 3.0
369 */
370 if (uA >= 3)
371 return VINF_SUCCESS;
372 rc = RTStrToUInt32Ex(pszNext+1, &pszNext, 10, &uB);
373 if ( RT_SUCCESS(rc)
374 && *pszNext == '.')
375 {
376 rc = RTStrToUInt32Ex(pszNext+1, &pszNext, 10, &uC);
377 if (RT_SUCCESS(rc))
378 {
379 uint32_t uLinuxVersion = (uA << 16) + (uB << 8) + uC;
380 if (uLinuxVersion >= (2 << 16) + (6 << 8) + 13)
381 return VINF_SUCCESS;
382 }
383 }
384 }
385 }
386
387 *ppszWhy = "Linux 2.6.13 or newer required!";
388 return VERR_SUPDRV_KERNEL_TOO_OLD_FOR_VTX;
389}
390
391#endif /* !IN_SUP_HARDENED_R3 */
392
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette