1 | /*
|
---|
2 | * CDDL HEADER START
|
---|
3 | *
|
---|
4 | * The contents of this file are subject to the terms of the
|
---|
5 | * Common Development and Distribution License (the "License").
|
---|
6 | * You may not use this file except in compliance with the License.
|
---|
7 | *
|
---|
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
---|
9 | * or http://www.opensolaris.org/os/licensing.
|
---|
10 | * See the License for the specific language governing permissions
|
---|
11 | * and limitations under the License.
|
---|
12 | *
|
---|
13 | * When distributing Covered Code, include this CDDL HEADER in each
|
---|
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
---|
15 | * If applicable, add the following below this CDDL HEADER, with the
|
---|
16 | * fields enclosed by brackets "[]" replaced with your own identifying
|
---|
17 | * information: Portions Copyright [yyyy] [name of copyright owner]
|
---|
18 | *
|
---|
19 | * CDDL HEADER END
|
---|
20 | */
|
---|
21 |
|
---|
22 | /*
|
---|
23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
|
---|
24 | */
|
---|
25 |
|
---|
26 | /*
|
---|
27 | * DTrace - Dynamic Tracing for Solaris
|
---|
28 | *
|
---|
29 | * This is the implementation of the Solaris Dynamic Tracing framework
|
---|
30 | * (DTrace). The user-visible interface to DTrace is described at length in
|
---|
31 | * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
|
---|
32 | * library, the in-kernel DTrace framework, and the DTrace providers are
|
---|
33 | * described in the block comments in the <sys/dtrace.h> header file. The
|
---|
34 | * internal architecture of DTrace is described in the block comments in the
|
---|
35 | * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
|
---|
36 | * implementation very much assume mastery of all of these sources; if one has
|
---|
37 | * an unanswered question about the implementation, one should consult them
|
---|
38 | * first.
|
---|
39 | *
|
---|
40 | * The functions here are ordered roughly as follows:
|
---|
41 | *
|
---|
42 | * - Probe context functions
|
---|
43 | * - Probe hashing functions
|
---|
44 | * - Non-probe context utility functions
|
---|
45 | * - Matching functions
|
---|
46 | * - Provider-to-Framework API functions
|
---|
47 | * - Probe management functions
|
---|
48 | * - DIF object functions
|
---|
49 | * - Format functions
|
---|
50 | * - Predicate functions
|
---|
51 | * - ECB functions
|
---|
52 | * - Buffer functions
|
---|
53 | * - Enabling functions
|
---|
54 | * - DOF functions
|
---|
55 | * - Anonymous enabling functions
|
---|
56 | * - Consumer state functions
|
---|
57 | * - Helper functions
|
---|
58 | * - Hook functions
|
---|
59 | * - Driver cookbook functions
|
---|
60 | *
|
---|
61 | * Each group of functions begins with a block comment labelled the "DTrace
|
---|
62 | * [Group] Functions", allowing one to find each block by searching forward
|
---|
63 | * on capital-f functions.
|
---|
64 | */
|
---|
65 | #ifndef VBOX
|
---|
66 | #include <sys/errno.h>
|
---|
67 | #include <sys/stat.h>
|
---|
68 | #include <sys/modctl.h>
|
---|
69 | #include <sys/conf.h>
|
---|
70 | #include <sys/systm.h>
|
---|
71 | #include <sys/ddi.h>
|
---|
72 | #include <sys/sunddi.h>
|
---|
73 | #include <sys/cpuvar.h>
|
---|
74 | #include <sys/kmem.h>
|
---|
75 | #include <sys/strsubr.h>
|
---|
76 | #include <sys/sysmacros.h>
|
---|
77 | #include <sys/dtrace_impl.h>
|
---|
78 | #include <sys/atomic.h>
|
---|
79 | #include <sys/cmn_err.h>
|
---|
80 | #include <sys/mutex_impl.h>
|
---|
81 | #include <sys/rwlock_impl.h>
|
---|
82 | #include <sys/ctf_api.h>
|
---|
83 | #include <sys/panic.h>
|
---|
84 | #include <sys/priv_impl.h>
|
---|
85 | #include <sys/policy.h>
|
---|
86 | #include <sys/cred_impl.h>
|
---|
87 | #include <sys/procfs_isa.h>
|
---|
88 | #include <sys/taskq.h>
|
---|
89 | #include <sys/mkdev.h>
|
---|
90 | #include <sys/kdi.h>
|
---|
91 | #include <sys/zone.h>
|
---|
92 | #include <sys/socket.h>
|
---|
93 | #include <netinet/in.h>
|
---|
94 |
|
---|
95 | #else /* VBOX */
|
---|
96 | # include <sys/dtrace_impl.h>
|
---|
97 | # include <VBox/sup.h>
|
---|
98 | # include <iprt/assert.h>
|
---|
99 | # include <iprt/cpuset.h>
|
---|
100 | # include <iprt/err.h>
|
---|
101 | # include <iprt/mem.h>
|
---|
102 | # include <iprt/mp.h>
|
---|
103 | # include <iprt/string.h>
|
---|
104 | # include <iprt/process.h>
|
---|
105 | # include <iprt/thread.h>
|
---|
106 | # include <iprt/timer.h>
|
---|
107 | # include <limits.h>
|
---|
108 |
|
---|
109 | # undef offsetof
|
---|
110 | # define offsetof RT_OFFSETOF
|
---|
111 |
|
---|
112 | /*
|
---|
113 | * Use asm.h to implemente some of the simple stuff in dtrace_asm.s.
|
---|
114 | */
|
---|
115 | # include <iprt/asm.h>
|
---|
116 | # if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
|
---|
117 | # include <iprt/asm-amd64-x86.h>
|
---|
118 | # elif defined(RT_ARCH_ARM64)
|
---|
119 | # include <iprt/asm-arm.h>
|
---|
120 | # endif
|
---|
121 | # define dtrace_casptr(a_ppvDst, a_pvOld, a_pvNew) \
|
---|
122 | VBoxDtCompareAndSwapPtr((void * volatile *)a_ppvDst, a_pvOld, a_pvNew)
|
---|
123 | DECLINLINE(void *) VBoxDtCompareAndSwapPtr(void * volatile *ppvDst, void *pvOld, void *pvNew)
|
---|
124 | {
|
---|
125 | void *pvRet;
|
---|
126 | ASMAtomicCmpXchgExPtrVoid(ppvDst, pvNew, pvOld, &pvRet);
|
---|
127 | return pvRet;
|
---|
128 | }
|
---|
129 |
|
---|
130 | # define dtrace_cas32(a_pu32Dst, a_pu32Old, a_pu32New) \
|
---|
131 | VBoxDtCompareAndSwapU32(a_pu32Dst, a_pu32Old, a_pu32New)
|
---|
132 | DECLINLINE(uint32_t) VBoxDtCompareAndSwapU32(uint32_t volatile *pu32Dst, uint32_t u32Old, uint32_t u32New)
|
---|
133 | {
|
---|
134 | uint32_t u32Ret;
|
---|
135 | ASMAtomicCmpXchgExU32(pu32Dst, u32New, u32Old, &u32Ret);
|
---|
136 | return u32Ret;
|
---|
137 | }
|
---|
138 |
|
---|
139 | # define dtrace_membar_consumer() ASMReadFence()
|
---|
140 | # define dtrace_membar_producer() ASMWriteFence()
|
---|
141 | # define dtrace_interrupt_disable() ASMIntDisableFlags()
|
---|
142 | # define dtrace_interrupt_enable(a_EFL) ASMSetFlags(a_EFL)
|
---|
143 |
|
---|
144 | /*
|
---|
145 | * NULL must be set to 0 or we'll end up with a billion warnings(=errors).
|
---|
146 | */
|
---|
147 | # undef NULL
|
---|
148 | # define NULL (0)
|
---|
149 | #endif /* VBOX */
|
---|
150 |
|
---|
151 | /** Check if the given address is a valid kernel address.
|
---|
152 | * The value can be uintptr_t or uint64_t. */
|
---|
153 | #ifndef VBOX
|
---|
154 | # define VBDT_IS_VALID_KRNL_ADDR(a_uAddr) ((a_uAddr) >= KERNELBASE)
|
---|
155 | #else
|
---|
156 | # define VBDT_IS_VALID_KRNL_ADDR(a_uAddr) \
|
---|
157 | ( (sizeof(a_uAddr) == sizeof(uintptr_t) || (uintptr_t)(a_uAddr) == (a_uAddr)) \
|
---|
158 | && RTR0MemKernelIsValidAddr((void *)(uintptr_t)(a_uAddr)) )
|
---|
159 | #endif
|
---|
160 |
|
---|
161 |
|
---|
162 | /*
|
---|
163 | * DTrace Tunable Variables
|
---|
164 | *
|
---|
165 | * The following variables may be tuned by adding a line to /etc/system that
|
---|
166 | * includes both the name of the DTrace module ("dtrace") and the name of the
|
---|
167 | * variable. For example:
|
---|
168 | *
|
---|
169 | * set dtrace:dtrace_destructive_disallow = 1
|
---|
170 | *
|
---|
171 | * In general, the only variables that one should be tuning this way are those
|
---|
172 | * that affect system-wide DTrace behavior, and for which the default behavior
|
---|
173 | * is undesirable. Most of these variables are tunable on a per-consumer
|
---|
174 | * basis using DTrace options, and need not be tuned on a system-wide basis.
|
---|
175 | * When tuning these variables, avoid pathological values; while some attempt
|
---|
176 | * is made to verify the integrity of these variables, they are not considered
|
---|
177 | * part of the supported interface to DTrace, and they are therefore not
|
---|
178 | * checked comprehensively. Further, these variables should not be tuned
|
---|
179 | * dynamically via "mdb -kw" or other means; they should only be tuned via
|
---|
180 | * /etc/system.
|
---|
181 | */
|
---|
182 | int dtrace_destructive_disallow = 0;
|
---|
183 | dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
|
---|
184 | size_t dtrace_difo_maxsize = (256 * 1024);
|
---|
185 | dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
|
---|
186 | size_t dtrace_global_maxsize = (16 * 1024);
|
---|
187 | size_t dtrace_actions_max = (16 * 1024);
|
---|
188 | size_t dtrace_retain_max = 1024;
|
---|
189 | dtrace_optval_t dtrace_helper_actions_max = 32;
|
---|
190 | dtrace_optval_t dtrace_helper_providers_max = 32;
|
---|
191 | dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
|
---|
192 | size_t dtrace_strsize_default = 256;
|
---|
193 | dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */
|
---|
194 | dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */
|
---|
195 | dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
|
---|
196 | dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
|
---|
197 | dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
|
---|
198 | dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
|
---|
199 | dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
|
---|
200 | dtrace_optval_t dtrace_nspec_default = 1;
|
---|
201 | dtrace_optval_t dtrace_specsize_default = 32 * 1024;
|
---|
202 | dtrace_optval_t dtrace_stackframes_default = 20;
|
---|
203 | dtrace_optval_t dtrace_ustackframes_default = 20;
|
---|
204 | dtrace_optval_t dtrace_jstackframes_default = 50;
|
---|
205 | dtrace_optval_t dtrace_jstackstrsize_default = 512;
|
---|
206 | int dtrace_msgdsize_max = 128;
|
---|
207 | hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */
|
---|
208 | hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
|
---|
209 | int dtrace_devdepth_max = 32;
|
---|
210 | int dtrace_err_verbose;
|
---|
211 | hrtime_t dtrace_deadman_interval = NANOSEC;
|
---|
212 | hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
|
---|
213 | hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
|
---|
214 |
|
---|
215 | /*
|
---|
216 | * DTrace External Variables
|
---|
217 | *
|
---|
218 | * As dtrace(7D) is a kernel module, any DTrace variables are obviously
|
---|
219 | * available to DTrace consumers via the backtick (`) syntax. One of these,
|
---|
220 | * dtrace_zero, is made deliberately so: it is provided as a source of
|
---|
221 | * well-known, zero-filled memory. While this variable is not documented,
|
---|
222 | * it is used by some translators as an implementation detail.
|
---|
223 | */
|
---|
224 | const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
|
---|
225 |
|
---|
226 | /*
|
---|
227 | * DTrace Internal Variables
|
---|
228 | */
|
---|
229 | #ifndef VBOX
|
---|
230 | static dev_info_t *dtrace_devi; /* device info */
|
---|
231 | #endif
|
---|
232 | static vmem_t *dtrace_arena; /* probe ID arena */
|
---|
233 | #ifndef VBOX
|
---|
234 | static vmem_t *dtrace_minor; /* minor number arena */
|
---|
235 | static taskq_t *dtrace_taskq; /* task queue */
|
---|
236 | #endif
|
---|
237 | static dtrace_probe_t **dtrace_probes; /* array of all probes */
|
---|
238 | static VBDTTYPE(uint32_t,int) dtrace_nprobes; /* number of probes */
|
---|
239 | static dtrace_provider_t *dtrace_provider; /* provider list */
|
---|
240 | static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
|
---|
241 | static int dtrace_opens; /* number of opens */
|
---|
242 | static int dtrace_helpers; /* number of helpers */
|
---|
243 | #ifndef VBOX
|
---|
244 | static void *dtrace_softstate; /* softstate pointer */
|
---|
245 | #endif
|
---|
246 | static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
|
---|
247 | static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
|
---|
248 | static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
|
---|
249 | static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
|
---|
250 | static int dtrace_toxranges; /* number of toxic ranges */
|
---|
251 | static int dtrace_toxranges_max; /* size of toxic range array */
|
---|
252 | static dtrace_anon_t dtrace_anon; /* anonymous enabling */
|
---|
253 | static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
|
---|
254 | static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
|
---|
255 | #ifndef VBOX
|
---|
256 | static kthread_t *dtrace_panicked; /* panicking thread */
|
---|
257 | #endif
|
---|
258 | static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
|
---|
259 | static dtrace_genid_t dtrace_probegen; /* current probe generation */
|
---|
260 | static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
|
---|
261 | static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
|
---|
262 | static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
|
---|
263 | static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
|
---|
264 | static int dtrace_dynvar_failclean; /* dynvars failed to clean */
|
---|
265 |
|
---|
266 | /*
|
---|
267 | * DTrace Locking
|
---|
268 | * DTrace is protected by three (relatively coarse-grained) locks:
|
---|
269 | *
|
---|
270 | * (1) dtrace_lock is required to manipulate essentially any DTrace state,
|
---|
271 | * including enabling state, probes, ECBs, consumer state, helper state,
|
---|
272 | * etc. Importantly, dtrace_lock is _not_ required when in probe context;
|
---|
273 | * probe context is lock-free -- synchronization is handled via the
|
---|
274 | * dtrace_sync() cross call mechanism.
|
---|
275 | *
|
---|
276 | * (2) dtrace_provider_lock is required when manipulating provider state, or
|
---|
277 | * when provider state must be held constant.
|
---|
278 | *
|
---|
279 | * (3) dtrace_meta_lock is required when manipulating meta provider state, or
|
---|
280 | * when meta provider state must be held constant.
|
---|
281 | *
|
---|
282 | * The lock ordering between these three locks is dtrace_meta_lock before
|
---|
283 | * dtrace_provider_lock before dtrace_lock. (In particular, there are
|
---|
284 | * several places where dtrace_provider_lock is held by the framework as it
|
---|
285 | * calls into the providers -- which then call back into the framework,
|
---|
286 | * grabbing dtrace_lock.)
|
---|
287 | *
|
---|
288 | * There are two other locks in the mix: mod_lock and cpu_lock. With respect
|
---|
289 | * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
|
---|
290 | * role as a coarse-grained lock; it is acquired before both of these locks.
|
---|
291 | * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
|
---|
292 | * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
|
---|
293 | * mod_lock is similar with respect to dtrace_provider_lock in that it must be
|
---|
294 | * acquired _between_ dtrace_provider_lock and dtrace_lock.
|
---|
295 | */
|
---|
296 | static kmutex_t dtrace_lock; /* probe state lock */
|
---|
297 | static kmutex_t dtrace_provider_lock; /* provider state lock */
|
---|
298 | static kmutex_t dtrace_meta_lock; /* meta-provider state lock */
|
---|
299 |
|
---|
300 | /*
|
---|
301 | * DTrace Provider Variables
|
---|
302 | *
|
---|
303 | * These are the variables relating to DTrace as a provider (that is, the
|
---|
304 | * provider of the BEGIN, END, and ERROR probes).
|
---|
305 | */
|
---|
306 | static dtrace_pattr_t dtrace_provider_attr = {
|
---|
307 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
|
---|
308 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
|
---|
309 | { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
|
---|
310 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
|
---|
311 | { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
|
---|
312 | };
|
---|
313 |
|
---|
314 | static void
|
---|
315 | dtrace_nullop(void)
|
---|
316 | {}
|
---|
317 |
|
---|
318 | static int
|
---|
319 | dtrace_enable_nullop(void)
|
---|
320 | {
|
---|
321 | return (0);
|
---|
322 | }
|
---|
323 |
|
---|
324 | static dtrace_pops_t dtrace_provider_ops = {
|
---|
325 | (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
|
---|
326 | (void (*)(void *, struct modctl *))dtrace_nullop,
|
---|
327 | (int (*)(void *, dtrace_id_t, void *))(uintptr_t)dtrace_enable_nullop,
|
---|
328 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
|
---|
329 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
|
---|
330 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
|
---|
331 | NULL,
|
---|
332 | NULL,
|
---|
333 | NULL,
|
---|
334 | (void (*)(void *, dtrace_id_t, void *))dtrace_nullop
|
---|
335 | };
|
---|
336 |
|
---|
337 | static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
|
---|
338 | static dtrace_id_t dtrace_probeid_end; /* special END probe */
|
---|
339 | dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
|
---|
340 |
|
---|
341 | /*
|
---|
342 | * DTrace Helper Tracing Variables
|
---|
343 | */
|
---|
344 | uint32_t dtrace_helptrace_next = 0;
|
---|
345 | uint32_t dtrace_helptrace_nlocals;
|
---|
346 | char *dtrace_helptrace_buffer;
|
---|
347 | int dtrace_helptrace_bufsize = 512 * 1024;
|
---|
348 |
|
---|
349 | #ifdef DEBUG
|
---|
350 | int dtrace_helptrace_enabled = 1;
|
---|
351 | #else
|
---|
352 | int dtrace_helptrace_enabled = 0;
|
---|
353 | #endif
|
---|
354 |
|
---|
355 | /*
|
---|
356 | * DTrace Error Hashing
|
---|
357 | *
|
---|
358 | * On DEBUG kernels, DTrace will track the errors that has seen in a hash
|
---|
359 | * table. This is very useful for checking coverage of tests that are
|
---|
360 | * expected to induce DIF or DOF processing errors, and may be useful for
|
---|
361 | * debugging problems in the DIF code generator or in DOF generation . The
|
---|
362 | * error hash may be examined with the ::dtrace_errhash MDB dcmd.
|
---|
363 | */
|
---|
364 | #ifdef DEBUG
|
---|
365 | static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
|
---|
366 | static const char *dtrace_errlast;
|
---|
367 | static kthread_t *dtrace_errthread;
|
---|
368 | static kmutex_t dtrace_errlock;
|
---|
369 | #endif
|
---|
370 |
|
---|
371 | /*
|
---|
372 | * DTrace Macros and Constants
|
---|
373 | *
|
---|
374 | * These are various macros that are useful in various spots in the
|
---|
375 | * implementation, along with a few random constants that have no meaning
|
---|
376 | * outside of the implementation. There is no real structure to this cpp
|
---|
377 | * mishmash -- but is there ever?
|
---|
378 | */
|
---|
379 | #define DTRACE_HASHSTR(hash, probe) \
|
---|
380 | dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
|
---|
381 |
|
---|
382 | #define DTRACE_HASHNEXT(hash, probe) \
|
---|
383 | (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
|
---|
384 |
|
---|
385 | #define DTRACE_HASHPREV(hash, probe) \
|
---|
386 | (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
|
---|
387 |
|
---|
388 | #define DTRACE_HASHEQ(hash, lhs, rhs) \
|
---|
389 | (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
|
---|
390 | *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
|
---|
391 |
|
---|
392 | #define DTRACE_AGGHASHSIZE_SLEW 17
|
---|
393 |
|
---|
394 | #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
|
---|
395 |
|
---|
396 | /*
|
---|
397 | * The key for a thread-local variable consists of the lower 61 bits of the
|
---|
398 | * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
|
---|
399 | * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
|
---|
400 | * equal to a variable identifier. This is necessary (but not sufficient) to
|
---|
401 | * assure that global associative arrays never collide with thread-local
|
---|
402 | * variables. To guarantee that they cannot collide, we must also define the
|
---|
403 | * order for keying dynamic variables. That order is:
|
---|
404 | *
|
---|
405 | * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
|
---|
406 | *
|
---|
407 | * Because the variable-key and the tls-key are in orthogonal spaces, there is
|
---|
408 | * no way for a global variable key signature to match a thread-local key
|
---|
409 | * signature.
|
---|
410 | */
|
---|
411 | #ifndef VBOX
|
---|
412 | #define DTRACE_TLS_THRKEY(where) { \
|
---|
413 | uint_t intr = 0; \
|
---|
414 | uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
|
---|
415 | for (; actv; actv >>= 1) \
|
---|
416 | intr++; \
|
---|
417 | ASSERT(intr < (1 << 3)); \
|
---|
418 | (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
|
---|
419 | (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
|
---|
420 | }
|
---|
421 | #else
|
---|
422 | #define DTRACE_TLS_THRKEY(where) do { \
|
---|
423 | (where) = (((uintptr_t)RTThreadNativeSelf() + DIF_VARIABLE_MAX) & (RT_BIT_64(61) - 1)) \
|
---|
424 | | (RTThreadIsInInterrupt(NIL_RTTHREAD) ? RT_BIT_64(61) : 0); \
|
---|
425 | } while (0)
|
---|
426 | #endif
|
---|
427 |
|
---|
428 | #define DT_BSWAP_8(x) ((x) & 0xff)
|
---|
429 | #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
|
---|
430 | #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
|
---|
431 | #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
|
---|
432 |
|
---|
433 | #define DT_MASK_LO 0x00000000FFFFFFFFULL
|
---|
434 |
|
---|
435 | #define DTRACE_STORE(type, tomax, offset, what) \
|
---|
436 | *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
|
---|
437 |
|
---|
438 | #ifndef __i386
|
---|
439 | #define DTRACE_ALIGNCHECK(addr, size, flags) \
|
---|
440 | if (addr & (size - 1)) { \
|
---|
441 | *flags |= CPU_DTRACE_BADALIGN; \
|
---|
442 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval = addr; \
|
---|
443 | return (0); \
|
---|
444 | }
|
---|
445 | #else
|
---|
446 | #define DTRACE_ALIGNCHECK(addr, size, flags)
|
---|
447 | #endif
|
---|
448 |
|
---|
449 | /*
|
---|
450 | * Test whether a range of memory starting at testaddr of size testsz falls
|
---|
451 | * within the range of memory described by addr, sz. We take care to avoid
|
---|
452 | * problems with overflow and underflow of the unsigned quantities, and
|
---|
453 | * disallow all negative sizes. Ranges of size 0 are allowed.
|
---|
454 | */
|
---|
455 | #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
|
---|
456 | ((testaddr) - (baseaddr) < (basesz) && \
|
---|
457 | (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
|
---|
458 | (testaddr) + (testsz) >= (testaddr))
|
---|
459 |
|
---|
460 | /*
|
---|
461 | * Test whether alloc_sz bytes will fit in the scratch region. We isolate
|
---|
462 | * alloc_sz on the righthand side of the comparison in order to avoid overflow
|
---|
463 | * or underflow in the comparison with it. This is simpler than the INRANGE
|
---|
464 | * check above, because we know that the dtms_scratch_ptr is valid in the
|
---|
465 | * range. Allocations of size zero are allowed.
|
---|
466 | */
|
---|
467 | #define DTRACE_INSCRATCH(mstate, alloc_sz) \
|
---|
468 | ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
|
---|
469 | (mstate)->dtms_scratch_ptr >= (alloc_sz))
|
---|
470 |
|
---|
471 | #ifndef VBOX
|
---|
472 | #define DTRACE_LOADFUNC(bits) \
|
---|
473 | /*CSTYLED*/ \
|
---|
474 | VBDTSTATIC uint##bits##_t \
|
---|
475 | dtrace_load##bits(uintptr_t addr) \
|
---|
476 | { \
|
---|
477 | size_t size = bits / NBBY; \
|
---|
478 | /*CSTYLED*/ \
|
---|
479 | uint##bits##_t rval; \
|
---|
480 | int i; \
|
---|
481 | processorid_t me = VBDT_GET_CPUID(); \
|
---|
482 | volatile uint16_t *flags = (volatile uint16_t *) \
|
---|
483 | &cpu_core[me].cpuc_dtrace_flags; \
|
---|
484 | \
|
---|
485 | DTRACE_ALIGNCHECK(addr, size, flags); \
|
---|
486 | \
|
---|
487 | for (i = 0; i < dtrace_toxranges; i++) { \
|
---|
488 | if (addr >= dtrace_toxrange[i].dtt_limit) \
|
---|
489 | continue; \
|
---|
490 | \
|
---|
491 | if (addr + size <= dtrace_toxrange[i].dtt_base) \
|
---|
492 | continue; \
|
---|
493 | \
|
---|
494 | /* \
|
---|
495 | * This address falls within a toxic region; return 0. \
|
---|
496 | */ \
|
---|
497 | *flags |= CPU_DTRACE_BADADDR; \
|
---|
498 | cpu_core[me].cpuc_dtrace_illval = addr; \
|
---|
499 | return (0); \
|
---|
500 | } \
|
---|
501 | \
|
---|
502 | *flags |= CPU_DTRACE_NOFAULT; \
|
---|
503 | /*CSTYLED*/ \
|
---|
504 | rval = *((volatile uint##bits##_t *)addr); \
|
---|
505 | *flags &= ~CPU_DTRACE_NOFAULT; \
|
---|
506 | \
|
---|
507 | return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
|
---|
508 | }
|
---|
509 | #else /* VBOX */
|
---|
510 | # define DTRACE_LOADFUNC(bits) \
|
---|
511 | VBDTSTATIC uint##bits##_t \
|
---|
512 | dtrace_load##bits(uintptr_t addr) \
|
---|
513 | { \
|
---|
514 | size_t const size = bits / NBBY; \
|
---|
515 | uint##bits##_t rval; \
|
---|
516 | processorid_t me; \
|
---|
517 | int i, rc; \
|
---|
518 | \
|
---|
519 | /*DTRACE_ALIGNCHECK(addr, size, flags);*/ \
|
---|
520 | \
|
---|
521 | for (i = 0; i < dtrace_toxranges; i++) { \
|
---|
522 | if (addr >= dtrace_toxrange[i].dtt_limit) \
|
---|
523 | continue; \
|
---|
524 | \
|
---|
525 | if (addr + size <= dtrace_toxrange[i].dtt_base) \
|
---|
526 | continue; \
|
---|
527 | \
|
---|
528 | /* \
|
---|
529 | * This address falls within a toxic region; return 0. \
|
---|
530 | */ \
|
---|
531 | me = VBDT_GET_CPUID(); \
|
---|
532 | cpu_core[me].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; \
|
---|
533 | cpu_core[me].cpuc_dtrace_illval = addr; \
|
---|
534 | return (0); \
|
---|
535 | } \
|
---|
536 | \
|
---|
537 | rc = RTR0MemKernelCopyFrom(&rval, (void const *)addr, size); \
|
---|
538 | if (RT_SUCCESS(rc)) \
|
---|
539 | return rval; \
|
---|
540 | \
|
---|
541 | /* \
|
---|
542 | * If not supported, pray it won't fault... \
|
---|
543 | */ \
|
---|
544 | if (rc == VERR_NOT_SUPPORTED) \
|
---|
545 | return *(uint##bits##_t const *)addr; \
|
---|
546 | \
|
---|
547 | me = VBDT_GET_CPUID(); \
|
---|
548 | cpu_core[me].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; \
|
---|
549 | cpu_core[me].cpuc_dtrace_illval = addr; \
|
---|
550 | return (0); \
|
---|
551 | }
|
---|
552 |
|
---|
553 | #endif /* VBOX */
|
---|
554 |
|
---|
555 | #ifdef _LP64
|
---|
556 | #define dtrace_loadptr dtrace_load64
|
---|
557 | #else
|
---|
558 | #define dtrace_loadptr dtrace_load32
|
---|
559 | #endif
|
---|
560 |
|
---|
561 | #define DTRACE_DYNHASH_FREE 0
|
---|
562 | #define DTRACE_DYNHASH_SINK 1
|
---|
563 | #define DTRACE_DYNHASH_VALID 2
|
---|
564 |
|
---|
565 | #define DTRACE_MATCH_FAIL -1
|
---|
566 | #define DTRACE_MATCH_NEXT 0
|
---|
567 | #define DTRACE_MATCH_DONE 1
|
---|
568 | #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
|
---|
569 | #define DTRACE_STATE_ALIGN 64
|
---|
570 |
|
---|
571 | #define DTRACE_FLAGS2FLT(flags) \
|
---|
572 | (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
|
---|
573 | ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
|
---|
574 | ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
|
---|
575 | ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
|
---|
576 | ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
|
---|
577 | ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
|
---|
578 | ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
|
---|
579 | ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
|
---|
580 | ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
|
---|
581 | DTRACEFLT_UNKNOWN)
|
---|
582 |
|
---|
583 | #define DTRACEACT_ISSTRING(act) \
|
---|
584 | ((act)->dta_kind == DTRACEACT_DIFEXPR && \
|
---|
585 | (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
|
---|
586 |
|
---|
587 | static size_t dtrace_strlen(const char *, size_t);
|
---|
588 | static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
|
---|
589 | static void dtrace_enabling_provide(dtrace_provider_t *);
|
---|
590 | static int dtrace_enabling_match(dtrace_enabling_t *, int *);
|
---|
591 | static void dtrace_enabling_matchall(void);
|
---|
592 | static dtrace_state_t *dtrace_anon_grab(void);
|
---|
593 | #ifndef VBOX
|
---|
594 | static uint64_t dtrace_helper(int, dtrace_mstate_t *,
|
---|
595 | dtrace_state_t *, uint64_t, uint64_t);
|
---|
596 | static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
|
---|
597 | #endif
|
---|
598 | static void dtrace_buffer_drop(dtrace_buffer_t *);
|
---|
599 | static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
|
---|
600 | dtrace_state_t *, dtrace_mstate_t *);
|
---|
601 | static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
|
---|
602 | dtrace_optval_t);
|
---|
603 | static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
|
---|
604 | #ifndef VBOX
|
---|
605 | static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
|
---|
606 | #endif
|
---|
607 |
|
---|
608 | /*
|
---|
609 | * DTrace Probe Context Functions
|
---|
610 | *
|
---|
611 | * These functions are called from probe context. Because probe context is
|
---|
612 | * any context in which C may be called, arbitrarily locks may be held,
|
---|
613 | * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
|
---|
614 | * As a result, functions called from probe context may only call other DTrace
|
---|
615 | * support functions -- they may not interact at all with the system at large.
|
---|
616 | * (Note that the ASSERT macro is made probe-context safe by redefining it in
|
---|
617 | * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
|
---|
618 | * loads are to be performed from probe context, they _must_ be in terms of
|
---|
619 | * the safe dtrace_load*() variants.
|
---|
620 | *
|
---|
621 | * Some functions in this block are not actually called from probe context;
|
---|
622 | * for these functions, there will be a comment above the function reading
|
---|
623 | * "Note: not called from probe context."
|
---|
624 | */
|
---|
625 | void
|
---|
626 | dtrace_panic(const char *format, ...)
|
---|
627 | {
|
---|
628 | va_list alist;
|
---|
629 |
|
---|
630 | va_start(alist, format);
|
---|
631 | dtrace_vpanic(format, alist);
|
---|
632 | va_end(alist);
|
---|
633 | }
|
---|
634 |
|
---|
635 | #ifndef VBOX /* We have our own assertion machinery. */
|
---|
636 | int
|
---|
637 | dtrace_assfail(const char *a, const char *f, int l)
|
---|
638 | {
|
---|
639 | dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
|
---|
640 |
|
---|
641 | /*
|
---|
642 | * We just need something here that even the most clever compiler
|
---|
643 | * cannot optimize away.
|
---|
644 | */
|
---|
645 | return (a[(uintptr_t)f]);
|
---|
646 | }
|
---|
647 | #endif
|
---|
648 |
|
---|
649 | /*
|
---|
650 | * Atomically increment a specified error counter from probe context.
|
---|
651 | */
|
---|
652 | static void
|
---|
653 | dtrace_error(uint32_t *counter)
|
---|
654 | {
|
---|
655 | /*
|
---|
656 | * Most counters stored to in probe context are per-CPU counters.
|
---|
657 | * However, there are some error conditions that are sufficiently
|
---|
658 | * arcane that they don't merit per-CPU storage. If these counters
|
---|
659 | * are incremented concurrently on different CPUs, scalability will be
|
---|
660 | * adversely affected -- but we don't expect them to be white-hot in a
|
---|
661 | * correctly constructed enabling...
|
---|
662 | */
|
---|
663 | uint32_t oval, nval;
|
---|
664 |
|
---|
665 | do {
|
---|
666 | oval = *counter;
|
---|
667 |
|
---|
668 | if ((nval = oval + 1) == 0) {
|
---|
669 | /*
|
---|
670 | * If the counter would wrap, set it to 1 -- assuring
|
---|
671 | * that the counter is never zero when we have seen
|
---|
672 | * errors. (The counter must be 32-bits because we
|
---|
673 | * aren't guaranteed a 64-bit compare&swap operation.)
|
---|
674 | * To save this code both the infamy of being fingered
|
---|
675 | * by a priggish news story and the indignity of being
|
---|
676 | * the target of a neo-puritan witch trial, we're
|
---|
677 | * carefully avoiding any colorful description of the
|
---|
678 | * likelihood of this condition -- but suffice it to
|
---|
679 | * say that it is only slightly more likely than the
|
---|
680 | * overflow of predicate cache IDs, as discussed in
|
---|
681 | * dtrace_predicate_create().
|
---|
682 | */
|
---|
683 | nval = 1;
|
---|
684 | }
|
---|
685 | } while (dtrace_cas32(counter, oval, nval) != oval);
|
---|
686 | }
|
---|
687 |
|
---|
688 | /*
|
---|
689 | * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
|
---|
690 | * uint8_t, a uint16_t, a uint32_t and a uint64_t.
|
---|
691 | */
|
---|
692 | DTRACE_LOADFUNC(8)
|
---|
693 | DTRACE_LOADFUNC(16)
|
---|
694 | DTRACE_LOADFUNC(32)
|
---|
695 | DTRACE_LOADFUNC(64)
|
---|
696 |
|
---|
697 | static int
|
---|
698 | dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
|
---|
699 | {
|
---|
700 | if (dest < mstate->dtms_scratch_base)
|
---|
701 | return (0);
|
---|
702 |
|
---|
703 | if (dest + size < dest)
|
---|
704 | return (0);
|
---|
705 |
|
---|
706 | if (dest + size > mstate->dtms_scratch_ptr)
|
---|
707 | return (0);
|
---|
708 |
|
---|
709 | return (1);
|
---|
710 | }
|
---|
711 |
|
---|
712 | static int
|
---|
713 | dtrace_canstore_statvar(uint64_t addr, size_t sz,
|
---|
714 | dtrace_statvar_t **svars, int nsvars)
|
---|
715 | {
|
---|
716 | int i;
|
---|
717 |
|
---|
718 | for (i = 0; i < nsvars; i++) {
|
---|
719 | dtrace_statvar_t *svar = svars[i];
|
---|
720 |
|
---|
721 | if (svar == NULL || svar->dtsv_size == 0)
|
---|
722 | continue;
|
---|
723 |
|
---|
724 | if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size))
|
---|
725 | return (1);
|
---|
726 | }
|
---|
727 |
|
---|
728 | return (0);
|
---|
729 | }
|
---|
730 |
|
---|
731 | /*
|
---|
732 | * Check to see if the address is within a memory region to which a store may
|
---|
733 | * be issued. This includes the DTrace scratch areas, and any DTrace variable
|
---|
734 | * region. The caller of dtrace_canstore() is responsible for performing any
|
---|
735 | * alignment checks that are needed before stores are actually executed.
|
---|
736 | */
|
---|
737 | static int
|
---|
738 | dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
|
---|
739 | dtrace_vstate_t *vstate)
|
---|
740 | {
|
---|
741 | /*
|
---|
742 | * First, check to see if the address is in scratch space...
|
---|
743 | */
|
---|
744 | if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
|
---|
745 | mstate->dtms_scratch_size))
|
---|
746 | return (1);
|
---|
747 |
|
---|
748 | /*
|
---|
749 | * Now check to see if it's a dynamic variable. This check will pick
|
---|
750 | * up both thread-local variables and any global dynamically-allocated
|
---|
751 | * variables.
|
---|
752 | */
|
---|
753 | if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
|
---|
754 | vstate->dtvs_dynvars.dtds_size)) {
|
---|
755 | dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
|
---|
756 | uintptr_t base = (uintptr_t)dstate->dtds_base +
|
---|
757 | (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
|
---|
758 | uintptr_t chunkoffs;
|
---|
759 |
|
---|
760 | /*
|
---|
761 | * Before we assume that we can store here, we need to make
|
---|
762 | * sure that it isn't in our metadata -- storing to our
|
---|
763 | * dynamic variable metadata would corrupt our state. For
|
---|
764 | * the range to not include any dynamic variable metadata,
|
---|
765 | * it must:
|
---|
766 | *
|
---|
767 | * (1) Start above the hash table that is at the base of
|
---|
768 | * the dynamic variable space
|
---|
769 | *
|
---|
770 | * (2) Have a starting chunk offset that is beyond the
|
---|
771 | * dtrace_dynvar_t that is at the base of every chunk
|
---|
772 | *
|
---|
773 | * (3) Not span a chunk boundary
|
---|
774 | *
|
---|
775 | */
|
---|
776 | if (addr < base)
|
---|
777 | return (0);
|
---|
778 |
|
---|
779 | chunkoffs = (addr - base) % dstate->dtds_chunksize;
|
---|
780 |
|
---|
781 | if (chunkoffs < sizeof (dtrace_dynvar_t))
|
---|
782 | return (0);
|
---|
783 |
|
---|
784 | if (chunkoffs + sz > dstate->dtds_chunksize)
|
---|
785 | return (0);
|
---|
786 |
|
---|
787 | return (1);
|
---|
788 | }
|
---|
789 |
|
---|
790 | /*
|
---|
791 | * Finally, check the static local and global variables. These checks
|
---|
792 | * take the longest, so we perform them last.
|
---|
793 | */
|
---|
794 | if (dtrace_canstore_statvar(addr, sz,
|
---|
795 | vstate->dtvs_locals, vstate->dtvs_nlocals))
|
---|
796 | return (1);
|
---|
797 |
|
---|
798 | if (dtrace_canstore_statvar(addr, sz,
|
---|
799 | vstate->dtvs_globals, vstate->dtvs_nglobals))
|
---|
800 | return (1);
|
---|
801 |
|
---|
802 | return (0);
|
---|
803 | }
|
---|
804 |
|
---|
805 |
|
---|
806 | /*
|
---|
807 | * Convenience routine to check to see if the address is within a memory
|
---|
808 | * region in which a load may be issued given the user's privilege level;
|
---|
809 | * if not, it sets the appropriate error flags and loads 'addr' into the
|
---|
810 | * illegal value slot.
|
---|
811 | *
|
---|
812 | * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
|
---|
813 | * appropriate memory access protection.
|
---|
814 | */
|
---|
815 | static int
|
---|
816 | dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
|
---|
817 | dtrace_vstate_t *vstate)
|
---|
818 | {
|
---|
819 | volatile uintptr_t *illval = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval;
|
---|
820 |
|
---|
821 | /*
|
---|
822 | * If we hold the privilege to read from kernel memory, then
|
---|
823 | * everything is readable.
|
---|
824 | */
|
---|
825 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
|
---|
826 | return (1);
|
---|
827 |
|
---|
828 | /*
|
---|
829 | * You can obviously read that which you can store.
|
---|
830 | */
|
---|
831 | if (dtrace_canstore(addr, sz, mstate, vstate))
|
---|
832 | return (1);
|
---|
833 |
|
---|
834 | /*
|
---|
835 | * We're allowed to read from our own string table.
|
---|
836 | */
|
---|
837 | if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
|
---|
838 | mstate->dtms_difo->dtdo_strlen))
|
---|
839 | return (1);
|
---|
840 |
|
---|
841 | DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
|
---|
842 | *illval = addr;
|
---|
843 | return (0);
|
---|
844 | }
|
---|
845 |
|
---|
846 | /*
|
---|
847 | * Convenience routine to check to see if a given string is within a memory
|
---|
848 | * region in which a load may be issued given the user's privilege level;
|
---|
849 | * this exists so that we don't need to issue unnecessary dtrace_strlen()
|
---|
850 | * calls in the event that the user has all privileges.
|
---|
851 | */
|
---|
852 | static int
|
---|
853 | dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
|
---|
854 | dtrace_vstate_t *vstate)
|
---|
855 | {
|
---|
856 | size_t strsz;
|
---|
857 |
|
---|
858 | /*
|
---|
859 | * If we hold the privilege to read from kernel memory, then
|
---|
860 | * everything is readable.
|
---|
861 | */
|
---|
862 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
|
---|
863 | return (1);
|
---|
864 |
|
---|
865 | strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz);
|
---|
866 | if (dtrace_canload(addr, strsz, mstate, vstate))
|
---|
867 | return (1);
|
---|
868 |
|
---|
869 | return (0);
|
---|
870 | }
|
---|
871 |
|
---|
872 | /*
|
---|
873 | * Convenience routine to check to see if a given variable is within a memory
|
---|
874 | * region in which a load may be issued given the user's privilege level.
|
---|
875 | */
|
---|
876 | static int
|
---|
877 | dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
|
---|
878 | dtrace_vstate_t *vstate)
|
---|
879 | {
|
---|
880 | size_t sz;
|
---|
881 | ASSERT(type->dtdt_flags & DIF_TF_BYREF);
|
---|
882 |
|
---|
883 | /*
|
---|
884 | * If we hold the privilege to read from kernel memory, then
|
---|
885 | * everything is readable.
|
---|
886 | */
|
---|
887 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
|
---|
888 | return (1);
|
---|
889 |
|
---|
890 | if (type->dtdt_kind == DIF_TYPE_STRING)
|
---|
891 | sz = dtrace_strlen(src,
|
---|
892 | vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1;
|
---|
893 | else
|
---|
894 | sz = type->dtdt_size;
|
---|
895 |
|
---|
896 | return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
|
---|
897 | }
|
---|
898 |
|
---|
899 | /*
|
---|
900 | * Compare two strings using safe loads.
|
---|
901 | */
|
---|
902 | static int
|
---|
903 | dtrace_strncmp(char *s1, char *s2, size_t limit)
|
---|
904 | {
|
---|
905 | uint8_t c1, c2;
|
---|
906 | volatile uint16_t *flags;
|
---|
907 |
|
---|
908 | if (s1 == s2 || limit == 0)
|
---|
909 | return (0);
|
---|
910 |
|
---|
911 | flags = (volatile uint16_t *)&cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
912 |
|
---|
913 | do {
|
---|
914 | if (s1 == NULL) {
|
---|
915 | c1 = '\0';
|
---|
916 | } else {
|
---|
917 | c1 = dtrace_load8((uintptr_t)s1++);
|
---|
918 | }
|
---|
919 |
|
---|
920 | if (s2 == NULL) {
|
---|
921 | c2 = '\0';
|
---|
922 | } else {
|
---|
923 | c2 = dtrace_load8((uintptr_t)s2++);
|
---|
924 | }
|
---|
925 |
|
---|
926 | if (c1 != c2)
|
---|
927 | return (c1 - c2);
|
---|
928 | } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
|
---|
929 |
|
---|
930 | return (0);
|
---|
931 | }
|
---|
932 |
|
---|
933 | /*
|
---|
934 | * Compute strlen(s) for a string using safe memory accesses. The additional
|
---|
935 | * len parameter is used to specify a maximum length to ensure completion.
|
---|
936 | */
|
---|
937 | static size_t
|
---|
938 | dtrace_strlen(const char *s, size_t lim)
|
---|
939 | {
|
---|
940 | uint_t len;
|
---|
941 |
|
---|
942 | for (len = 0; len != lim; len++) {
|
---|
943 | if (dtrace_load8((uintptr_t)s++) == '\0')
|
---|
944 | break;
|
---|
945 | }
|
---|
946 |
|
---|
947 | return (len);
|
---|
948 | }
|
---|
949 |
|
---|
950 | /*
|
---|
951 | * Check if an address falls within a toxic region.
|
---|
952 | */
|
---|
953 | static int
|
---|
954 | dtrace_istoxic(uintptr_t kaddr, size_t size)
|
---|
955 | {
|
---|
956 | uintptr_t taddr, tsize;
|
---|
957 | int i;
|
---|
958 |
|
---|
959 | for (i = 0; i < dtrace_toxranges; i++) {
|
---|
960 | taddr = dtrace_toxrange[i].dtt_base;
|
---|
961 | tsize = dtrace_toxrange[i].dtt_limit - taddr;
|
---|
962 |
|
---|
963 | if (kaddr - taddr < tsize) {
|
---|
964 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
|
---|
965 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval = kaddr;
|
---|
966 | return (1);
|
---|
967 | }
|
---|
968 |
|
---|
969 | if (taddr - kaddr < size) {
|
---|
970 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
|
---|
971 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval = taddr;
|
---|
972 | return (1);
|
---|
973 | }
|
---|
974 | }
|
---|
975 |
|
---|
976 | return (0);
|
---|
977 | }
|
---|
978 |
|
---|
979 | /*
|
---|
980 | * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
|
---|
981 | * memory specified by the DIF program. The dst is assumed to be safe memory
|
---|
982 | * that we can store to directly because it is managed by DTrace. As with
|
---|
983 | * standard bcopy, overlapping copies are handled properly.
|
---|
984 | */
|
---|
985 | static void
|
---|
986 | dtrace_bcopy(const void *src, void *dst, size_t len)
|
---|
987 | {
|
---|
988 | if (len != 0) {
|
---|
989 | uint8_t *s1 = dst;
|
---|
990 | const uint8_t *s2 = src;
|
---|
991 |
|
---|
992 | if (s1 <= s2) {
|
---|
993 | do {
|
---|
994 | *s1++ = dtrace_load8((uintptr_t)s2++);
|
---|
995 | } while (--len != 0);
|
---|
996 | } else {
|
---|
997 | s2 += len;
|
---|
998 | s1 += len;
|
---|
999 |
|
---|
1000 | do {
|
---|
1001 | *--s1 = dtrace_load8((uintptr_t)--s2);
|
---|
1002 | } while (--len != 0);
|
---|
1003 | }
|
---|
1004 | }
|
---|
1005 | }
|
---|
1006 |
|
---|
1007 | /*
|
---|
1008 | * Copy src to dst using safe memory accesses, up to either the specified
|
---|
1009 | * length, or the point that a nul byte is encountered. The src is assumed to
|
---|
1010 | * be unsafe memory specified by the DIF program. The dst is assumed to be
|
---|
1011 | * safe memory that we can store to directly because it is managed by DTrace.
|
---|
1012 | * Unlike dtrace_bcopy(), overlapping regions are not handled.
|
---|
1013 | */
|
---|
1014 | static void
|
---|
1015 | dtrace_strcpy(const void *src, void *dst, size_t len)
|
---|
1016 | {
|
---|
1017 | if (len != 0) {
|
---|
1018 | uint8_t *s1 = dst, c;
|
---|
1019 | const uint8_t *s2 = src;
|
---|
1020 |
|
---|
1021 | do {
|
---|
1022 | *s1++ = c = dtrace_load8((uintptr_t)s2++);
|
---|
1023 | } while (--len != 0 && c != '\0');
|
---|
1024 | }
|
---|
1025 | }
|
---|
1026 |
|
---|
1027 | /*
|
---|
1028 | * Copy src to dst, deriving the size and type from the specified (BYREF)
|
---|
1029 | * variable type. The src is assumed to be unsafe memory specified by the DIF
|
---|
1030 | * program. The dst is assumed to be DTrace variable memory that is of the
|
---|
1031 | * specified type; we assume that we can store to directly.
|
---|
1032 | */
|
---|
1033 | static void
|
---|
1034 | dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type)
|
---|
1035 | {
|
---|
1036 | ASSERT(type->dtdt_flags & DIF_TF_BYREF);
|
---|
1037 |
|
---|
1038 | if (type->dtdt_kind == DIF_TYPE_STRING) {
|
---|
1039 | dtrace_strcpy(src, dst, type->dtdt_size);
|
---|
1040 | } else {
|
---|
1041 | dtrace_bcopy(src, dst, type->dtdt_size);
|
---|
1042 | }
|
---|
1043 | }
|
---|
1044 |
|
---|
1045 | /*
|
---|
1046 | * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
|
---|
1047 | * unsafe memory specified by the DIF program. The s2 data is assumed to be
|
---|
1048 | * safe memory that we can access directly because it is managed by DTrace.
|
---|
1049 | */
|
---|
1050 | static int
|
---|
1051 | dtrace_bcmp(const void *s1, const void *s2, size_t len)
|
---|
1052 | {
|
---|
1053 | volatile uint16_t *flags;
|
---|
1054 |
|
---|
1055 | flags = (volatile uint16_t *)&cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
1056 |
|
---|
1057 | if (s1 == s2)
|
---|
1058 | return (0);
|
---|
1059 |
|
---|
1060 | if (s1 == NULL || s2 == NULL)
|
---|
1061 | return (1);
|
---|
1062 |
|
---|
1063 | if (s1 != s2 && len != 0) {
|
---|
1064 | const uint8_t *ps1 = s1;
|
---|
1065 | const uint8_t *ps2 = s2;
|
---|
1066 |
|
---|
1067 | do {
|
---|
1068 | if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
|
---|
1069 | return (1);
|
---|
1070 | } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
|
---|
1071 | }
|
---|
1072 | return (0);
|
---|
1073 | }
|
---|
1074 |
|
---|
1075 | /*
|
---|
1076 | * Zero the specified region using a simple byte-by-byte loop. Note that this
|
---|
1077 | * is for safe DTrace-managed memory only.
|
---|
1078 | */
|
---|
1079 | static void
|
---|
1080 | dtrace_bzero(void *dst, size_t len)
|
---|
1081 | {
|
---|
1082 | uchar_t *cp;
|
---|
1083 |
|
---|
1084 | for (cp = dst; len != 0; len--)
|
---|
1085 | *cp++ = 0;
|
---|
1086 | }
|
---|
1087 |
|
---|
1088 | static void
|
---|
1089 | dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
|
---|
1090 | {
|
---|
1091 | uint64_t result[2];
|
---|
1092 |
|
---|
1093 | result[0] = addend1[0] + addend2[0];
|
---|
1094 | result[1] = addend1[1] + addend2[1] +
|
---|
1095 | (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
|
---|
1096 |
|
---|
1097 | sum[0] = result[0];
|
---|
1098 | sum[1] = result[1];
|
---|
1099 | }
|
---|
1100 |
|
---|
1101 | /*
|
---|
1102 | * Shift the 128-bit value in a by b. If b is positive, shift left.
|
---|
1103 | * If b is negative, shift right.
|
---|
1104 | */
|
---|
1105 | static void
|
---|
1106 | dtrace_shift_128(uint64_t *a, int b)
|
---|
1107 | {
|
---|
1108 | uint64_t mask;
|
---|
1109 |
|
---|
1110 | if (b == 0)
|
---|
1111 | return;
|
---|
1112 |
|
---|
1113 | if (b < 0) {
|
---|
1114 | b = -b;
|
---|
1115 | if (b >= 64) {
|
---|
1116 | a[0] = a[1] >> (b - 64);
|
---|
1117 | a[1] = 0;
|
---|
1118 | } else {
|
---|
1119 | a[0] >>= b;
|
---|
1120 | mask = 1LL << (64 - b);
|
---|
1121 | mask -= 1;
|
---|
1122 | a[0] |= ((a[1] & mask) << (64 - b));
|
---|
1123 | a[1] >>= b;
|
---|
1124 | }
|
---|
1125 | } else {
|
---|
1126 | if (b >= 64) {
|
---|
1127 | a[1] = a[0] << (b - 64);
|
---|
1128 | a[0] = 0;
|
---|
1129 | } else {
|
---|
1130 | a[1] <<= b;
|
---|
1131 | mask = a[0] >> (64 - b);
|
---|
1132 | a[1] |= mask;
|
---|
1133 | a[0] <<= b;
|
---|
1134 | }
|
---|
1135 | }
|
---|
1136 | }
|
---|
1137 |
|
---|
1138 | /*
|
---|
1139 | * The basic idea is to break the 2 64-bit values into 4 32-bit values,
|
---|
1140 | * use native multiplication on those, and then re-combine into the
|
---|
1141 | * resulting 128-bit value.
|
---|
1142 | *
|
---|
1143 | * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
|
---|
1144 | * hi1 * hi2 << 64 +
|
---|
1145 | * hi1 * lo2 << 32 +
|
---|
1146 | * hi2 * lo1 << 32 +
|
---|
1147 | * lo1 * lo2
|
---|
1148 | */
|
---|
1149 | static void
|
---|
1150 | dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
|
---|
1151 | {
|
---|
1152 | uint64_t hi1, hi2, lo1, lo2;
|
---|
1153 | uint64_t tmp[2];
|
---|
1154 |
|
---|
1155 | hi1 = factor1 >> 32;
|
---|
1156 | hi2 = factor2 >> 32;
|
---|
1157 |
|
---|
1158 | lo1 = factor1 & DT_MASK_LO;
|
---|
1159 | lo2 = factor2 & DT_MASK_LO;
|
---|
1160 |
|
---|
1161 | product[0] = lo1 * lo2;
|
---|
1162 | product[1] = hi1 * hi2;
|
---|
1163 |
|
---|
1164 | tmp[0] = hi1 * lo2;
|
---|
1165 | tmp[1] = 0;
|
---|
1166 | dtrace_shift_128(tmp, 32);
|
---|
1167 | dtrace_add_128(product, tmp, product);
|
---|
1168 |
|
---|
1169 | tmp[0] = hi2 * lo1;
|
---|
1170 | tmp[1] = 0;
|
---|
1171 | dtrace_shift_128(tmp, 32);
|
---|
1172 | dtrace_add_128(product, tmp, product);
|
---|
1173 | }
|
---|
1174 |
|
---|
1175 | /*
|
---|
1176 | * This privilege check should be used by actions and subroutines to
|
---|
1177 | * verify that the user credentials of the process that enabled the
|
---|
1178 | * invoking ECB match the target credentials
|
---|
1179 | */
|
---|
1180 | static int
|
---|
1181 | dtrace_priv_proc_common_user(dtrace_state_t *state)
|
---|
1182 | {
|
---|
1183 | cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
|
---|
1184 |
|
---|
1185 | /*
|
---|
1186 | * We should always have a non-NULL state cred here, since if cred
|
---|
1187 | * is null (anonymous tracing), we fast-path bypass this routine.
|
---|
1188 | */
|
---|
1189 | ASSERT(s_cr != NULL);
|
---|
1190 |
|
---|
1191 | if ((cr = CRED()) != NULL &&
|
---|
1192 | s_cr->cr_uid == cr->cr_uid &&
|
---|
1193 | s_cr->cr_uid == cr->cr_ruid &&
|
---|
1194 | s_cr->cr_uid == cr->cr_suid &&
|
---|
1195 | s_cr->cr_gid == cr->cr_gid &&
|
---|
1196 | s_cr->cr_gid == cr->cr_rgid &&
|
---|
1197 | s_cr->cr_gid == cr->cr_sgid)
|
---|
1198 | return (1);
|
---|
1199 |
|
---|
1200 | return (0);
|
---|
1201 | }
|
---|
1202 |
|
---|
1203 | /*
|
---|
1204 | * This privilege check should be used by actions and subroutines to
|
---|
1205 | * verify that the zone of the process that enabled the invoking ECB
|
---|
1206 | * matches the target credentials
|
---|
1207 | */
|
---|
1208 | static int
|
---|
1209 | dtrace_priv_proc_common_zone(dtrace_state_t *state)
|
---|
1210 | {
|
---|
1211 | cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
|
---|
1212 |
|
---|
1213 | /*
|
---|
1214 | * We should always have a non-NULL state cred here, since if cred
|
---|
1215 | * is null (anonymous tracing), we fast-path bypass this routine.
|
---|
1216 | */
|
---|
1217 | ASSERT(s_cr != NULL);
|
---|
1218 |
|
---|
1219 | if ((cr = CRED()) != NULL &&
|
---|
1220 | s_cr->cr_zone == cr->cr_zone)
|
---|
1221 | return (1);
|
---|
1222 |
|
---|
1223 | return (0);
|
---|
1224 | }
|
---|
1225 |
|
---|
1226 | /*
|
---|
1227 | * This privilege check should be used by actions and subroutines to
|
---|
1228 | * verify that the process has not setuid or changed credentials.
|
---|
1229 | */
|
---|
1230 | static int
|
---|
1231 | dtrace_priv_proc_common_nocd(VBDTVOID)
|
---|
1232 | {
|
---|
1233 | #ifndef VBOX
|
---|
1234 | proc_t *proc;
|
---|
1235 |
|
---|
1236 | if ((proc = VBDT_GET_PROC()) != NULL &&
|
---|
1237 | !(proc->p_flag & SNOCD))
|
---|
1238 | return (1);
|
---|
1239 |
|
---|
1240 | return (0);
|
---|
1241 | #else
|
---|
1242 | return (1);
|
---|
1243 | #endif
|
---|
1244 | }
|
---|
1245 |
|
---|
1246 | #ifndef VBOX
|
---|
1247 | static int
|
---|
1248 | dtrace_priv_proc_destructive(dtrace_state_t *state)
|
---|
1249 | {
|
---|
1250 | int action = state->dts_cred.dcr_action;
|
---|
1251 |
|
---|
1252 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
|
---|
1253 | dtrace_priv_proc_common_zone(state) == 0)
|
---|
1254 | goto bad;
|
---|
1255 |
|
---|
1256 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
|
---|
1257 | dtrace_priv_proc_common_user(state) == 0)
|
---|
1258 | goto bad;
|
---|
1259 |
|
---|
1260 | if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
|
---|
1261 | dtrace_priv_proc_common_nocd() == 0)
|
---|
1262 | goto bad;
|
---|
1263 |
|
---|
1264 | return (1);
|
---|
1265 |
|
---|
1266 | bad:
|
---|
1267 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
|
---|
1268 |
|
---|
1269 | return (0);
|
---|
1270 | }
|
---|
1271 | #endif /* !VBOX */
|
---|
1272 |
|
---|
1273 | static int
|
---|
1274 | dtrace_priv_proc_control(dtrace_state_t *state)
|
---|
1275 | {
|
---|
1276 | if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
|
---|
1277 | return (1);
|
---|
1278 |
|
---|
1279 | if (dtrace_priv_proc_common_zone(state) &&
|
---|
1280 | dtrace_priv_proc_common_user(state) &&
|
---|
1281 | dtrace_priv_proc_common_nocd())
|
---|
1282 | return (1);
|
---|
1283 |
|
---|
1284 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
|
---|
1285 |
|
---|
1286 | return (0);
|
---|
1287 | }
|
---|
1288 |
|
---|
1289 | static int
|
---|
1290 | dtrace_priv_proc(dtrace_state_t *state)
|
---|
1291 | {
|
---|
1292 | if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
|
---|
1293 | return (1);
|
---|
1294 |
|
---|
1295 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
|
---|
1296 |
|
---|
1297 | return (0);
|
---|
1298 | }
|
---|
1299 |
|
---|
1300 | static int
|
---|
1301 | dtrace_priv_kernel(dtrace_state_t *state)
|
---|
1302 | {
|
---|
1303 | if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
|
---|
1304 | return (1);
|
---|
1305 |
|
---|
1306 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
|
---|
1307 |
|
---|
1308 | return (0);
|
---|
1309 | }
|
---|
1310 |
|
---|
1311 | static int
|
---|
1312 | dtrace_priv_kernel_destructive(dtrace_state_t *state)
|
---|
1313 | {
|
---|
1314 | if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
|
---|
1315 | return (1);
|
---|
1316 |
|
---|
1317 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
|
---|
1318 |
|
---|
1319 | return (0);
|
---|
1320 | }
|
---|
1321 |
|
---|
1322 | /*
|
---|
1323 | * Note: not called from probe context. This function is called
|
---|
1324 | * asynchronously (and at a regular interval) from outside of probe context to
|
---|
1325 | * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
|
---|
1326 | * cleaning is explained in detail in <sys/dtrace_impl.h>.
|
---|
1327 | */
|
---|
1328 | VBDTSTATIC void
|
---|
1329 | dtrace_dynvar_clean(dtrace_dstate_t *dstate)
|
---|
1330 | {
|
---|
1331 | dtrace_dynvar_t *dirty;
|
---|
1332 | dtrace_dstate_percpu_t *dcpu;
|
---|
1333 | dtrace_dynvar_t **rinsep;
|
---|
1334 | int i, j, work = 0;
|
---|
1335 |
|
---|
1336 | for (i = 0; i < NCPU; i++) {
|
---|
1337 | dcpu = &dstate->dtds_percpu[i];
|
---|
1338 | rinsep = &dcpu->dtdsc_rinsing;
|
---|
1339 |
|
---|
1340 | /*
|
---|
1341 | * If the dirty list is NULL, there is no dirty work to do.
|
---|
1342 | */
|
---|
1343 | if (dcpu->dtdsc_dirty == NULL)
|
---|
1344 | continue;
|
---|
1345 |
|
---|
1346 | if (dcpu->dtdsc_rinsing != NULL) {
|
---|
1347 | /*
|
---|
1348 | * If the rinsing list is non-NULL, then it is because
|
---|
1349 | * this CPU was selected to accept another CPU's
|
---|
1350 | * dirty list -- and since that time, dirty buffers
|
---|
1351 | * have accumulated. This is a highly unlikely
|
---|
1352 | * condition, but we choose to ignore the dirty
|
---|
1353 | * buffers -- they'll be picked up a future cleanse.
|
---|
1354 | */
|
---|
1355 | continue;
|
---|
1356 | }
|
---|
1357 |
|
---|
1358 | if (dcpu->dtdsc_clean != NULL) {
|
---|
1359 | /*
|
---|
1360 | * If the clean list is non-NULL, then we're in a
|
---|
1361 | * situation where a CPU has done deallocations (we
|
---|
1362 | * have a non-NULL dirty list) but no allocations (we
|
---|
1363 | * also have a non-NULL clean list). We can't simply
|
---|
1364 | * move the dirty list into the clean list on this
|
---|
1365 | * CPU, yet we also don't want to allow this condition
|
---|
1366 | * to persist, lest a short clean list prevent a
|
---|
1367 | * massive dirty list from being cleaned (which in
|
---|
1368 | * turn could lead to otherwise avoidable dynamic
|
---|
1369 | * drops). To deal with this, we look for some CPU
|
---|
1370 | * with a NULL clean list, NULL dirty list, and NULL
|
---|
1371 | * rinsing list -- and then we borrow this CPU to
|
---|
1372 | * rinse our dirty list.
|
---|
1373 | */
|
---|
1374 | for (j = 0; j < NCPU; j++) {
|
---|
1375 | dtrace_dstate_percpu_t *rinser;
|
---|
1376 |
|
---|
1377 | rinser = &dstate->dtds_percpu[j];
|
---|
1378 |
|
---|
1379 | if (rinser->dtdsc_rinsing != NULL)
|
---|
1380 | continue;
|
---|
1381 |
|
---|
1382 | if (rinser->dtdsc_dirty != NULL)
|
---|
1383 | continue;
|
---|
1384 |
|
---|
1385 | if (rinser->dtdsc_clean != NULL)
|
---|
1386 | continue;
|
---|
1387 |
|
---|
1388 | rinsep = &rinser->dtdsc_rinsing;
|
---|
1389 | break;
|
---|
1390 | }
|
---|
1391 |
|
---|
1392 | if (j == NCPU) {
|
---|
1393 | /*
|
---|
1394 | * We were unable to find another CPU that
|
---|
1395 | * could accept this dirty list -- we are
|
---|
1396 | * therefore unable to clean it now.
|
---|
1397 | */
|
---|
1398 | dtrace_dynvar_failclean++;
|
---|
1399 | continue;
|
---|
1400 | }
|
---|
1401 | }
|
---|
1402 |
|
---|
1403 | work = 1;
|
---|
1404 |
|
---|
1405 | /*
|
---|
1406 | * Atomically move the dirty list aside.
|
---|
1407 | */
|
---|
1408 | do {
|
---|
1409 | dirty = dcpu->dtdsc_dirty;
|
---|
1410 |
|
---|
1411 | /*
|
---|
1412 | * Before we zap the dirty list, set the rinsing list.
|
---|
1413 | * (This allows for a potential assertion in
|
---|
1414 | * dtrace_dynvar(): if a free dynamic variable appears
|
---|
1415 | * on a hash chain, either the dirty list or the
|
---|
1416 | * rinsing list for some CPU must be non-NULL.)
|
---|
1417 | */
|
---|
1418 | *rinsep = dirty;
|
---|
1419 | dtrace_membar_producer();
|
---|
1420 | } while (dtrace_casptr(&dcpu->dtdsc_dirty,
|
---|
1421 | dirty, NULL) != dirty);
|
---|
1422 | }
|
---|
1423 |
|
---|
1424 | if (!work) {
|
---|
1425 | /*
|
---|
1426 | * We have no work to do; we can simply return.
|
---|
1427 | */
|
---|
1428 | return;
|
---|
1429 | }
|
---|
1430 |
|
---|
1431 | dtrace_sync();
|
---|
1432 |
|
---|
1433 | for (i = 0; i < NCPU; i++) {
|
---|
1434 | dcpu = &dstate->dtds_percpu[i];
|
---|
1435 |
|
---|
1436 | if (dcpu->dtdsc_rinsing == NULL)
|
---|
1437 | continue;
|
---|
1438 |
|
---|
1439 | /*
|
---|
1440 | * We are now guaranteed that no hash chain contains a pointer
|
---|
1441 | * into this dirty list; we can make it clean.
|
---|
1442 | */
|
---|
1443 | ASSERT(dcpu->dtdsc_clean == NULL);
|
---|
1444 | dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
|
---|
1445 | dcpu->dtdsc_rinsing = NULL;
|
---|
1446 | }
|
---|
1447 |
|
---|
1448 | /*
|
---|
1449 | * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
|
---|
1450 | * sure that all CPUs have seen all of the dtdsc_clean pointers.
|
---|
1451 | * This prevents a race whereby a CPU incorrectly decides that
|
---|
1452 | * the state should be something other than DTRACE_DSTATE_CLEAN
|
---|
1453 | * after dtrace_dynvar_clean() has completed.
|
---|
1454 | */
|
---|
1455 | dtrace_sync();
|
---|
1456 |
|
---|
1457 | dstate->dtds_state = DTRACE_DSTATE_CLEAN;
|
---|
1458 | }
|
---|
1459 |
|
---|
1460 | /*
|
---|
1461 | * Depending on the value of the op parameter, this function looks-up,
|
---|
1462 | * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
|
---|
1463 | * allocation is requested, this function will return a pointer to a
|
---|
1464 | * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
|
---|
1465 | * variable can be allocated. If NULL is returned, the appropriate counter
|
---|
1466 | * will be incremented.
|
---|
1467 | */
|
---|
1468 | VBDTSTATIC dtrace_dynvar_t *
|
---|
1469 | dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
|
---|
1470 | dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
|
---|
1471 | dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
|
---|
1472 | {
|
---|
1473 | uint64_t hashval = DTRACE_DYNHASH_VALID;
|
---|
1474 | dtrace_dynhash_t *hash = dstate->dtds_hash;
|
---|
1475 | dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
|
---|
1476 | processorid_t me = VBDT_GET_CPUID(), cpu = me;
|
---|
1477 | dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
|
---|
1478 | size_t bucket, ksize;
|
---|
1479 | size_t chunksize = dstate->dtds_chunksize;
|
---|
1480 | uintptr_t kdata, lock, nstate;
|
---|
1481 | uint_t i;
|
---|
1482 |
|
---|
1483 | ASSERT(nkeys != 0);
|
---|
1484 |
|
---|
1485 | /*
|
---|
1486 | * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
|
---|
1487 | * algorithm. For the by-value portions, we perform the algorithm in
|
---|
1488 | * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
|
---|
1489 | * bit, and seems to have only a minute effect on distribution. For
|
---|
1490 | * the by-reference data, we perform "One-at-a-time" iterating (safely)
|
---|
1491 | * over each referenced byte. It's painful to do this, but it's much
|
---|
1492 | * better than pathological hash distribution. The efficacy of the
|
---|
1493 | * hashing algorithm (and a comparison with other algorithms) may be
|
---|
1494 | * found by running the ::dtrace_dynstat MDB dcmd.
|
---|
1495 | */
|
---|
1496 | for (i = 0; i < nkeys; i++) {
|
---|
1497 | if (key[i].dttk_size == 0) {
|
---|
1498 | uint64_t val = key[i].dttk_value;
|
---|
1499 |
|
---|
1500 | hashval += (val >> 48) & 0xffff;
|
---|
1501 | hashval += (hashval << 10);
|
---|
1502 | hashval ^= (hashval >> 6);
|
---|
1503 |
|
---|
1504 | hashval += (val >> 32) & 0xffff;
|
---|
1505 | hashval += (hashval << 10);
|
---|
1506 | hashval ^= (hashval >> 6);
|
---|
1507 |
|
---|
1508 | hashval += (val >> 16) & 0xffff;
|
---|
1509 | hashval += (hashval << 10);
|
---|
1510 | hashval ^= (hashval >> 6);
|
---|
1511 |
|
---|
1512 | hashval += val & 0xffff;
|
---|
1513 | hashval += (hashval << 10);
|
---|
1514 | hashval ^= (hashval >> 6);
|
---|
1515 | } else {
|
---|
1516 | /*
|
---|
1517 | * This is incredibly painful, but it beats the hell
|
---|
1518 | * out of the alternative.
|
---|
1519 | */
|
---|
1520 | uint64_t j, size = key[i].dttk_size;
|
---|
1521 | uintptr_t base = (uintptr_t)key[i].dttk_value;
|
---|
1522 |
|
---|
1523 | if (!dtrace_canload(base, size, mstate, vstate))
|
---|
1524 | break;
|
---|
1525 |
|
---|
1526 | for (j = 0; j < size; j++) {
|
---|
1527 | hashval += dtrace_load8(base + j);
|
---|
1528 | hashval += (hashval << 10);
|
---|
1529 | hashval ^= (hashval >> 6);
|
---|
1530 | }
|
---|
1531 | }
|
---|
1532 | }
|
---|
1533 |
|
---|
1534 | if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
|
---|
1535 | return (NULL);
|
---|
1536 |
|
---|
1537 | hashval += (hashval << 3);
|
---|
1538 | hashval ^= (hashval >> 11);
|
---|
1539 | hashval += (hashval << 15);
|
---|
1540 |
|
---|
1541 | /*
|
---|
1542 | * There is a remote chance (ideally, 1 in 2^31) that our hashval
|
---|
1543 | * comes out to be one of our two sentinel hash values. If this
|
---|
1544 | * actually happens, we set the hashval to be a value known to be a
|
---|
1545 | * non-sentinel value.
|
---|
1546 | */
|
---|
1547 | if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
|
---|
1548 | hashval = DTRACE_DYNHASH_VALID;
|
---|
1549 |
|
---|
1550 | /*
|
---|
1551 | * Yes, it's painful to do a divide here. If the cycle count becomes
|
---|
1552 | * important here, tricks can be pulled to reduce it. (However, it's
|
---|
1553 | * critical that hash collisions be kept to an absolute minimum;
|
---|
1554 | * they're much more painful than a divide.) It's better to have a
|
---|
1555 | * solution that generates few collisions and still keeps things
|
---|
1556 | * relatively simple.
|
---|
1557 | */
|
---|
1558 | bucket = hashval % dstate->dtds_hashsize;
|
---|
1559 |
|
---|
1560 | if (op == DTRACE_DYNVAR_DEALLOC) {
|
---|
1561 | volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
|
---|
1562 |
|
---|
1563 | for (;;) {
|
---|
1564 | while ((lock = *lockp) & 1)
|
---|
1565 | continue;
|
---|
1566 |
|
---|
1567 | if (dtrace_casptr((void *)lockp,
|
---|
1568 | (void *)lock, (void *)(lock + 1)) == (void *)lock)
|
---|
1569 | break;
|
---|
1570 | }
|
---|
1571 |
|
---|
1572 | dtrace_membar_producer();
|
---|
1573 | }
|
---|
1574 |
|
---|
1575 | top:
|
---|
1576 | prev = NULL;
|
---|
1577 | lock = hash[bucket].dtdh_lock;
|
---|
1578 |
|
---|
1579 | dtrace_membar_consumer();
|
---|
1580 |
|
---|
1581 | start = hash[bucket].dtdh_chain;
|
---|
1582 | ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
|
---|
1583 | start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
|
---|
1584 | op != DTRACE_DYNVAR_DEALLOC));
|
---|
1585 |
|
---|
1586 | for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
|
---|
1587 | dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
|
---|
1588 | dtrace_key_t *dkey = &dtuple->dtt_key[0];
|
---|
1589 |
|
---|
1590 | if (dvar->dtdv_hashval != hashval) {
|
---|
1591 | if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
|
---|
1592 | /*
|
---|
1593 | * We've reached the sink, and therefore the
|
---|
1594 | * end of the hash chain; we can kick out of
|
---|
1595 | * the loop knowing that we have seen a valid
|
---|
1596 | * snapshot of state.
|
---|
1597 | */
|
---|
1598 | ASSERT(dvar->dtdv_next == NULL);
|
---|
1599 | ASSERT(dvar == &dtrace_dynhash_sink);
|
---|
1600 | break;
|
---|
1601 | }
|
---|
1602 |
|
---|
1603 | if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
|
---|
1604 | /*
|
---|
1605 | * We've gone off the rails: somewhere along
|
---|
1606 | * the line, one of the members of this hash
|
---|
1607 | * chain was deleted. Note that we could also
|
---|
1608 | * detect this by simply letting this loop run
|
---|
1609 | * to completion, as we would eventually hit
|
---|
1610 | * the end of the dirty list. However, we
|
---|
1611 | * want to avoid running the length of the
|
---|
1612 | * dirty list unnecessarily (it might be quite
|
---|
1613 | * long), so we catch this as early as
|
---|
1614 | * possible by detecting the hash marker. In
|
---|
1615 | * this case, we simply set dvar to NULL and
|
---|
1616 | * break; the conditional after the loop will
|
---|
1617 | * send us back to top.
|
---|
1618 | */
|
---|
1619 | dvar = NULL;
|
---|
1620 | break;
|
---|
1621 | }
|
---|
1622 |
|
---|
1623 | goto next;
|
---|
1624 | }
|
---|
1625 |
|
---|
1626 | if (dtuple->dtt_nkeys != nkeys)
|
---|
1627 | goto next;
|
---|
1628 |
|
---|
1629 | for (i = 0; i < nkeys; i++, dkey++) {
|
---|
1630 | if (dkey->dttk_size != key[i].dttk_size)
|
---|
1631 | goto next; /* size or type mismatch */
|
---|
1632 |
|
---|
1633 | if (dkey->dttk_size != 0) {
|
---|
1634 | if (dtrace_bcmp(
|
---|
1635 | (void *)(uintptr_t)key[i].dttk_value,
|
---|
1636 | (void *)(uintptr_t)dkey->dttk_value,
|
---|
1637 | dkey->dttk_size))
|
---|
1638 | goto next;
|
---|
1639 | } else {
|
---|
1640 | if (dkey->dttk_value != key[i].dttk_value)
|
---|
1641 | goto next;
|
---|
1642 | }
|
---|
1643 | }
|
---|
1644 |
|
---|
1645 | if (op != DTRACE_DYNVAR_DEALLOC)
|
---|
1646 | return (dvar);
|
---|
1647 |
|
---|
1648 | ASSERT(dvar->dtdv_next == NULL ||
|
---|
1649 | dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
|
---|
1650 |
|
---|
1651 | if (prev != NULL) {
|
---|
1652 | ASSERT(hash[bucket].dtdh_chain != dvar);
|
---|
1653 | ASSERT(start != dvar);
|
---|
1654 | ASSERT(prev->dtdv_next == dvar);
|
---|
1655 | prev->dtdv_next = dvar->dtdv_next;
|
---|
1656 | } else {
|
---|
1657 | if (dtrace_casptr(&hash[bucket].dtdh_chain,
|
---|
1658 | start, dvar->dtdv_next) != start) {
|
---|
1659 | /*
|
---|
1660 | * We have failed to atomically swing the
|
---|
1661 | * hash table head pointer, presumably because
|
---|
1662 | * of a conflicting allocation on another CPU.
|
---|
1663 | * We need to reread the hash chain and try
|
---|
1664 | * again.
|
---|
1665 | */
|
---|
1666 | goto top;
|
---|
1667 | }
|
---|
1668 | }
|
---|
1669 |
|
---|
1670 | dtrace_membar_producer();
|
---|
1671 |
|
---|
1672 | /*
|
---|
1673 | * Now set the hash value to indicate that it's free.
|
---|
1674 | */
|
---|
1675 | ASSERT(hash[bucket].dtdh_chain != dvar);
|
---|
1676 | dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
|
---|
1677 |
|
---|
1678 | dtrace_membar_producer();
|
---|
1679 |
|
---|
1680 | /*
|
---|
1681 | * Set the next pointer to point at the dirty list, and
|
---|
1682 | * atomically swing the dirty pointer to the newly freed dvar.
|
---|
1683 | */
|
---|
1684 | do {
|
---|
1685 | next = dcpu->dtdsc_dirty;
|
---|
1686 | dvar->dtdv_next = next;
|
---|
1687 | } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
|
---|
1688 |
|
---|
1689 | /*
|
---|
1690 | * Finally, unlock this hash bucket.
|
---|
1691 | */
|
---|
1692 | ASSERT(hash[bucket].dtdh_lock == lock);
|
---|
1693 | ASSERT(lock & 1);
|
---|
1694 | hash[bucket].dtdh_lock++;
|
---|
1695 |
|
---|
1696 | return (NULL);
|
---|
1697 | next:
|
---|
1698 | prev = dvar;
|
---|
1699 | continue;
|
---|
1700 | }
|
---|
1701 |
|
---|
1702 | if (dvar == NULL) {
|
---|
1703 | /*
|
---|
1704 | * If dvar is NULL, it is because we went off the rails:
|
---|
1705 | * one of the elements that we traversed in the hash chain
|
---|
1706 | * was deleted while we were traversing it. In this case,
|
---|
1707 | * we assert that we aren't doing a dealloc (deallocs lock
|
---|
1708 | * the hash bucket to prevent themselves from racing with
|
---|
1709 | * one another), and retry the hash chain traversal.
|
---|
1710 | */
|
---|
1711 | ASSERT(op != DTRACE_DYNVAR_DEALLOC);
|
---|
1712 | goto top;
|
---|
1713 | }
|
---|
1714 |
|
---|
1715 | if (op != DTRACE_DYNVAR_ALLOC) {
|
---|
1716 | /*
|
---|
1717 | * If we are not to allocate a new variable, we want to
|
---|
1718 | * return NULL now. Before we return, check that the value
|
---|
1719 | * of the lock word hasn't changed. If it has, we may have
|
---|
1720 | * seen an inconsistent snapshot.
|
---|
1721 | */
|
---|
1722 | if (op == DTRACE_DYNVAR_NOALLOC) {
|
---|
1723 | if (hash[bucket].dtdh_lock != lock)
|
---|
1724 | goto top;
|
---|
1725 | } else {
|
---|
1726 | ASSERT(op == DTRACE_DYNVAR_DEALLOC);
|
---|
1727 | ASSERT(hash[bucket].dtdh_lock == lock);
|
---|
1728 | ASSERT(lock & 1);
|
---|
1729 | hash[bucket].dtdh_lock++;
|
---|
1730 | }
|
---|
1731 |
|
---|
1732 | return (NULL);
|
---|
1733 | }
|
---|
1734 |
|
---|
1735 | /*
|
---|
1736 | * We need to allocate a new dynamic variable. The size we need is the
|
---|
1737 | * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
|
---|
1738 | * size of any auxiliary key data (rounded up to 8-byte alignment) plus
|
---|
1739 | * the size of any referred-to data (dsize). We then round the final
|
---|
1740 | * size up to the chunksize for allocation.
|
---|
1741 | */
|
---|
1742 | for (ksize = 0, i = 0; i < nkeys; i++)
|
---|
1743 | ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
|
---|
1744 |
|
---|
1745 | /*
|
---|
1746 | * This should be pretty much impossible, but could happen if, say,
|
---|
1747 | * strange DIF specified the tuple. Ideally, this should be an
|
---|
1748 | * assertion and not an error condition -- but that requires that the
|
---|
1749 | * chunksize calculation in dtrace_difo_chunksize() be absolutely
|
---|
1750 | * bullet-proof. (That is, it must not be able to be fooled by
|
---|
1751 | * malicious DIF.) Given the lack of backwards branches in DIF,
|
---|
1752 | * solving this would presumably not amount to solving the Halting
|
---|
1753 | * Problem -- but it still seems awfully hard.
|
---|
1754 | */
|
---|
1755 | if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
|
---|
1756 | ksize + dsize > chunksize) {
|
---|
1757 | dcpu->dtdsc_drops++;
|
---|
1758 | return (NULL);
|
---|
1759 | }
|
---|
1760 |
|
---|
1761 | nstate = DTRACE_DSTATE_EMPTY;
|
---|
1762 |
|
---|
1763 | do {
|
---|
1764 | retry:
|
---|
1765 | free = dcpu->dtdsc_free;
|
---|
1766 |
|
---|
1767 | if (free == NULL) {
|
---|
1768 | dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
|
---|
1769 | void *rval;
|
---|
1770 |
|
---|
1771 | if (clean == NULL) {
|
---|
1772 | /*
|
---|
1773 | * We're out of dynamic variable space on
|
---|
1774 | * this CPU. Unless we have tried all CPUs,
|
---|
1775 | * we'll try to allocate from a different
|
---|
1776 | * CPU.
|
---|
1777 | */
|
---|
1778 | switch (dstate->dtds_state) {
|
---|
1779 | case DTRACE_DSTATE_CLEAN: {
|
---|
1780 | void *sp = &dstate->dtds_state;
|
---|
1781 |
|
---|
1782 | if (++cpu >= NCPU)
|
---|
1783 | cpu = 0;
|
---|
1784 |
|
---|
1785 | if (dcpu->dtdsc_dirty != NULL &&
|
---|
1786 | nstate == DTRACE_DSTATE_EMPTY)
|
---|
1787 | nstate = DTRACE_DSTATE_DIRTY;
|
---|
1788 |
|
---|
1789 | if (dcpu->dtdsc_rinsing != NULL)
|
---|
1790 | nstate = DTRACE_DSTATE_RINSING;
|
---|
1791 |
|
---|
1792 | dcpu = &dstate->dtds_percpu[cpu];
|
---|
1793 |
|
---|
1794 | if (cpu != me)
|
---|
1795 | goto retry;
|
---|
1796 |
|
---|
1797 | (void) dtrace_cas32(sp,
|
---|
1798 | DTRACE_DSTATE_CLEAN, nstate);
|
---|
1799 |
|
---|
1800 | /*
|
---|
1801 | * To increment the correct bean
|
---|
1802 | * counter, take another lap.
|
---|
1803 | */
|
---|
1804 | goto retry;
|
---|
1805 | }
|
---|
1806 |
|
---|
1807 | case DTRACE_DSTATE_DIRTY:
|
---|
1808 | dcpu->dtdsc_dirty_drops++;
|
---|
1809 | break;
|
---|
1810 |
|
---|
1811 | case DTRACE_DSTATE_RINSING:
|
---|
1812 | dcpu->dtdsc_rinsing_drops++;
|
---|
1813 | break;
|
---|
1814 |
|
---|
1815 | case DTRACE_DSTATE_EMPTY:
|
---|
1816 | dcpu->dtdsc_drops++;
|
---|
1817 | break;
|
---|
1818 | }
|
---|
1819 |
|
---|
1820 | DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
|
---|
1821 | return (NULL);
|
---|
1822 | }
|
---|
1823 |
|
---|
1824 | /*
|
---|
1825 | * The clean list appears to be non-empty. We want to
|
---|
1826 | * move the clean list to the free list; we start by
|
---|
1827 | * moving the clean pointer aside.
|
---|
1828 | */
|
---|
1829 | if (dtrace_casptr(&dcpu->dtdsc_clean,
|
---|
1830 | clean, NULL) != clean) {
|
---|
1831 | /*
|
---|
1832 | * We are in one of two situations:
|
---|
1833 | *
|
---|
1834 | * (a) The clean list was switched to the
|
---|
1835 | * free list by another CPU.
|
---|
1836 | *
|
---|
1837 | * (b) The clean list was added to by the
|
---|
1838 | * cleansing cyclic.
|
---|
1839 | *
|
---|
1840 | * In either of these situations, we can
|
---|
1841 | * just reattempt the free list allocation.
|
---|
1842 | */
|
---|
1843 | goto retry;
|
---|
1844 | }
|
---|
1845 |
|
---|
1846 | ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
|
---|
1847 |
|
---|
1848 | /*
|
---|
1849 | * Now we'll move the clean list to our free list.
|
---|
1850 | * It's impossible for this to fail: the only way
|
---|
1851 | * the free list can be updated is through this
|
---|
1852 | * code path, and only one CPU can own the clean list.
|
---|
1853 | * Thus, it would only be possible for this to fail if
|
---|
1854 | * this code were racing with dtrace_dynvar_clean().
|
---|
1855 | * (That is, if dtrace_dynvar_clean() updated the clean
|
---|
1856 | * list, and we ended up racing to update the free
|
---|
1857 | * list.) This race is prevented by the dtrace_sync()
|
---|
1858 | * in dtrace_dynvar_clean() -- which flushes the
|
---|
1859 | * owners of the clean lists out before resetting
|
---|
1860 | * the clean lists.
|
---|
1861 | */
|
---|
1862 | dcpu = &dstate->dtds_percpu[me];
|
---|
1863 | rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
|
---|
1864 | ASSERT(rval == NULL);
|
---|
1865 | goto retry;
|
---|
1866 | }
|
---|
1867 |
|
---|
1868 | dvar = free;
|
---|
1869 | new_free = dvar->dtdv_next;
|
---|
1870 | } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
|
---|
1871 |
|
---|
1872 | /*
|
---|
1873 | * We have now allocated a new chunk. We copy the tuple keys into the
|
---|
1874 | * tuple array and copy any referenced key data into the data space
|
---|
1875 | * following the tuple array. As we do this, we relocate dttk_value
|
---|
1876 | * in the final tuple to point to the key data address in the chunk.
|
---|
1877 | */
|
---|
1878 | kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
|
---|
1879 | dvar->dtdv_data = (void *)(kdata + ksize);
|
---|
1880 | dvar->dtdv_tuple.dtt_nkeys = nkeys;
|
---|
1881 |
|
---|
1882 | for (i = 0; i < nkeys; i++) {
|
---|
1883 | dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
|
---|
1884 | size_t kesize = key[i].dttk_size;
|
---|
1885 |
|
---|
1886 | if (kesize != 0) {
|
---|
1887 | dtrace_bcopy(
|
---|
1888 | (const void *)(uintptr_t)key[i].dttk_value,
|
---|
1889 | (void *)kdata, kesize);
|
---|
1890 | dkey->dttk_value = kdata;
|
---|
1891 | kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
|
---|
1892 | } else {
|
---|
1893 | dkey->dttk_value = key[i].dttk_value;
|
---|
1894 | }
|
---|
1895 |
|
---|
1896 | dkey->dttk_size = kesize;
|
---|
1897 | }
|
---|
1898 |
|
---|
1899 | ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
|
---|
1900 | dvar->dtdv_hashval = hashval;
|
---|
1901 | dvar->dtdv_next = start;
|
---|
1902 |
|
---|
1903 | if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
|
---|
1904 | return (dvar);
|
---|
1905 |
|
---|
1906 | /*
|
---|
1907 | * The cas has failed. Either another CPU is adding an element to
|
---|
1908 | * this hash chain, or another CPU is deleting an element from this
|
---|
1909 | * hash chain. The simplest way to deal with both of these cases
|
---|
1910 | * (though not necessarily the most efficient) is to free our
|
---|
1911 | * allocated block and tail-call ourselves. Note that the free is
|
---|
1912 | * to the dirty list and _not_ to the free list. This is to prevent
|
---|
1913 | * races with allocators, above.
|
---|
1914 | */
|
---|
1915 | dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
|
---|
1916 |
|
---|
1917 | dtrace_membar_producer();
|
---|
1918 |
|
---|
1919 | do {
|
---|
1920 | free = dcpu->dtdsc_dirty;
|
---|
1921 | dvar->dtdv_next = free;
|
---|
1922 | } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
|
---|
1923 |
|
---|
1924 | return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate));
|
---|
1925 | }
|
---|
1926 |
|
---|
1927 | /*ARGSUSED*/
|
---|
1928 | static void
|
---|
1929 | dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
|
---|
1930 | {
|
---|
1931 | RT_NOREF_PV(arg);
|
---|
1932 | if ((int64_t)nval < (int64_t)*oval)
|
---|
1933 | *oval = nval;
|
---|
1934 | }
|
---|
1935 |
|
---|
1936 | /*ARGSUSED*/
|
---|
1937 | static void
|
---|
1938 | dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
|
---|
1939 | {
|
---|
1940 | RT_NOREF_PV(arg);
|
---|
1941 | if ((int64_t)nval > (int64_t)*oval)
|
---|
1942 | *oval = nval;
|
---|
1943 | }
|
---|
1944 |
|
---|
1945 | static void
|
---|
1946 | dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
|
---|
1947 | {
|
---|
1948 | int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
|
---|
1949 | int64_t val = (int64_t)nval;
|
---|
1950 |
|
---|
1951 | if (val < 0) {
|
---|
1952 | for (i = 0; i < zero; i++) {
|
---|
1953 | if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
|
---|
1954 | quanta[i] += incr;
|
---|
1955 | return;
|
---|
1956 | }
|
---|
1957 | }
|
---|
1958 | } else {
|
---|
1959 | for (i = zero + 1; i < VBDTCAST(int)DTRACE_QUANTIZE_NBUCKETS; i++) {
|
---|
1960 | if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
|
---|
1961 | quanta[i - 1] += incr;
|
---|
1962 | return;
|
---|
1963 | }
|
---|
1964 | }
|
---|
1965 |
|
---|
1966 | quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
|
---|
1967 | return;
|
---|
1968 | }
|
---|
1969 |
|
---|
1970 | #ifndef VBOX
|
---|
1971 | ASSERT(0);
|
---|
1972 | #else
|
---|
1973 | AssertFatalFailed();
|
---|
1974 | #endif
|
---|
1975 | }
|
---|
1976 |
|
---|
1977 | static void
|
---|
1978 | dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
|
---|
1979 | {
|
---|
1980 | uint64_t arg = *lquanta++;
|
---|
1981 | int32_t base = DTRACE_LQUANTIZE_BASE(arg);
|
---|
1982 | uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
|
---|
1983 | uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
|
---|
1984 | int32_t val = (int32_t)nval, level;
|
---|
1985 |
|
---|
1986 | ASSERT(step != 0);
|
---|
1987 | ASSERT(levels != 0);
|
---|
1988 |
|
---|
1989 | if (val < base) {
|
---|
1990 | /*
|
---|
1991 | * This is an underflow.
|
---|
1992 | */
|
---|
1993 | lquanta[0] += incr;
|
---|
1994 | return;
|
---|
1995 | }
|
---|
1996 |
|
---|
1997 | level = (val - base) / step;
|
---|
1998 |
|
---|
1999 | if (level < levels) {
|
---|
2000 | lquanta[level + 1] += incr;
|
---|
2001 | return;
|
---|
2002 | }
|
---|
2003 |
|
---|
2004 | /*
|
---|
2005 | * This is an overflow.
|
---|
2006 | */
|
---|
2007 | lquanta[levels + 1] += incr;
|
---|
2008 | }
|
---|
2009 |
|
---|
2010 | /*ARGSUSED*/
|
---|
2011 | static void
|
---|
2012 | dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
|
---|
2013 | {
|
---|
2014 | RT_NOREF_PV(arg);
|
---|
2015 | data[0]++;
|
---|
2016 | data[1] += nval;
|
---|
2017 | }
|
---|
2018 |
|
---|
2019 | /*ARGSUSED*/
|
---|
2020 | static void
|
---|
2021 | dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
|
---|
2022 | {
|
---|
2023 | int64_t snval = (int64_t)nval;
|
---|
2024 | uint64_t tmp[2];
|
---|
2025 | RT_NOREF_PV(arg);
|
---|
2026 |
|
---|
2027 | data[0]++;
|
---|
2028 | data[1] += nval;
|
---|
2029 |
|
---|
2030 | /*
|
---|
2031 | * What we want to say here is:
|
---|
2032 | *
|
---|
2033 | * data[2] += nval * nval;
|
---|
2034 | *
|
---|
2035 | * But given that nval is 64-bit, we could easily overflow, so
|
---|
2036 | * we do this as 128-bit arithmetic.
|
---|
2037 | */
|
---|
2038 | if (snval < 0)
|
---|
2039 | snval = -snval;
|
---|
2040 |
|
---|
2041 | dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
|
---|
2042 | dtrace_add_128(data + 2, tmp, data + 2);
|
---|
2043 | }
|
---|
2044 |
|
---|
2045 | /*ARGSUSED*/
|
---|
2046 | static void
|
---|
2047 | dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
|
---|
2048 | {
|
---|
2049 | RT_NOREF_PV(arg); RT_NOREF_PV(nval);
|
---|
2050 |
|
---|
2051 | *oval = *oval + 1;
|
---|
2052 | }
|
---|
2053 |
|
---|
2054 | /*ARGSUSED*/
|
---|
2055 | static void
|
---|
2056 | dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
|
---|
2057 | {
|
---|
2058 | RT_NOREF_PV(arg);
|
---|
2059 | *oval += nval;
|
---|
2060 | }
|
---|
2061 |
|
---|
2062 | /*
|
---|
2063 | * Aggregate given the tuple in the principal data buffer, and the aggregating
|
---|
2064 | * action denoted by the specified dtrace_aggregation_t. The aggregation
|
---|
2065 | * buffer is specified as the buf parameter. This routine does not return
|
---|
2066 | * failure; if there is no space in the aggregation buffer, the data will be
|
---|
2067 | * dropped, and a corresponding counter incremented.
|
---|
2068 | */
|
---|
2069 | static void
|
---|
2070 | dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
|
---|
2071 | intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
|
---|
2072 | {
|
---|
2073 | dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
|
---|
2074 | uint32_t i, ndx, size, fsize;
|
---|
2075 | uint32_t align = sizeof (uint64_t) - 1;
|
---|
2076 | dtrace_aggbuffer_t *agb;
|
---|
2077 | dtrace_aggkey_t *key;
|
---|
2078 | uint32_t hashval = 0, limit, isstr;
|
---|
2079 | caddr_t tomax, data, kdata;
|
---|
2080 | dtrace_actkind_t action;
|
---|
2081 | dtrace_action_t *act;
|
---|
2082 | uintptr_t offs;
|
---|
2083 |
|
---|
2084 | if (buf == NULL)
|
---|
2085 | return;
|
---|
2086 |
|
---|
2087 | if (!agg->dtag_hasarg) {
|
---|
2088 | /*
|
---|
2089 | * Currently, only quantize() and lquantize() take additional
|
---|
2090 | * arguments, and they have the same semantics: an increment
|
---|
2091 | * value that defaults to 1 when not present. If additional
|
---|
2092 | * aggregating actions take arguments, the setting of the
|
---|
2093 | * default argument value will presumably have to become more
|
---|
2094 | * sophisticated...
|
---|
2095 | */
|
---|
2096 | arg = 1;
|
---|
2097 | }
|
---|
2098 |
|
---|
2099 | action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
|
---|
2100 | size = rec->dtrd_offset - agg->dtag_base;
|
---|
2101 | fsize = size + rec->dtrd_size;
|
---|
2102 |
|
---|
2103 | ASSERT(dbuf->dtb_tomax != NULL);
|
---|
2104 | data = dbuf->dtb_tomax + offset + agg->dtag_base;
|
---|
2105 |
|
---|
2106 | if ((tomax = buf->dtb_tomax) == NULL) {
|
---|
2107 | dtrace_buffer_drop(buf);
|
---|
2108 | return;
|
---|
2109 | }
|
---|
2110 |
|
---|
2111 | /*
|
---|
2112 | * The metastructure is always at the bottom of the buffer.
|
---|
2113 | */
|
---|
2114 | agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
|
---|
2115 | sizeof (dtrace_aggbuffer_t));
|
---|
2116 |
|
---|
2117 | if (buf->dtb_offset == 0) {
|
---|
2118 | /*
|
---|
2119 | * We just kludge up approximately 1/8th of the size to be
|
---|
2120 | * buckets. If this guess ends up being routinely
|
---|
2121 | * off-the-mark, we may need to dynamically readjust this
|
---|
2122 | * based on past performance.
|
---|
2123 | */
|
---|
2124 | uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
|
---|
2125 |
|
---|
2126 | if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
|
---|
2127 | (uintptr_t)tomax || hashsize == 0) {
|
---|
2128 | /*
|
---|
2129 | * We've been given a ludicrously small buffer;
|
---|
2130 | * increment our drop count and leave.
|
---|
2131 | */
|
---|
2132 | dtrace_buffer_drop(buf);
|
---|
2133 | return;
|
---|
2134 | }
|
---|
2135 |
|
---|
2136 | /*
|
---|
2137 | * And now, a pathetic attempt to try to get a an odd (or
|
---|
2138 | * perchance, a prime) hash size for better hash distribution.
|
---|
2139 | */
|
---|
2140 | if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
|
---|
2141 | hashsize -= DTRACE_AGGHASHSIZE_SLEW;
|
---|
2142 |
|
---|
2143 | agb->dtagb_hashsize = hashsize;
|
---|
2144 | agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
|
---|
2145 | agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
|
---|
2146 | agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
|
---|
2147 |
|
---|
2148 | for (i = 0; i < agb->dtagb_hashsize; i++)
|
---|
2149 | agb->dtagb_hash[i] = NULL;
|
---|
2150 | }
|
---|
2151 |
|
---|
2152 | ASSERT(agg->dtag_first != NULL);
|
---|
2153 | ASSERT(agg->dtag_first->dta_intuple);
|
---|
2154 |
|
---|
2155 | /*
|
---|
2156 | * Calculate the hash value based on the key. Note that we _don't_
|
---|
2157 | * include the aggid in the hashing (but we will store it as part of
|
---|
2158 | * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
|
---|
2159 | * algorithm: a simple, quick algorithm that has no known funnels, and
|
---|
2160 | * gets good distribution in practice. The efficacy of the hashing
|
---|
2161 | * algorithm (and a comparison with other algorithms) may be found by
|
---|
2162 | * running the ::dtrace_aggstat MDB dcmd.
|
---|
2163 | */
|
---|
2164 | for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
|
---|
2165 | i = act->dta_rec.dtrd_offset - agg->dtag_base;
|
---|
2166 | limit = i + act->dta_rec.dtrd_size;
|
---|
2167 | ASSERT(limit <= size);
|
---|
2168 | isstr = DTRACEACT_ISSTRING(act);
|
---|
2169 |
|
---|
2170 | for (; i < limit; i++) {
|
---|
2171 | hashval += data[i];
|
---|
2172 | hashval += (hashval << 10);
|
---|
2173 | hashval ^= (hashval >> 6);
|
---|
2174 |
|
---|
2175 | if (isstr && data[i] == '\0')
|
---|
2176 | break;
|
---|
2177 | }
|
---|
2178 | }
|
---|
2179 |
|
---|
2180 | hashval += (hashval << 3);
|
---|
2181 | hashval ^= (hashval >> 11);
|
---|
2182 | hashval += (hashval << 15);
|
---|
2183 |
|
---|
2184 | /*
|
---|
2185 | * Yes, the divide here is expensive -- but it's generally the least
|
---|
2186 | * of the performance issues given the amount of data that we iterate
|
---|
2187 | * over to compute hash values, compare data, etc.
|
---|
2188 | */
|
---|
2189 | ndx = hashval % agb->dtagb_hashsize;
|
---|
2190 |
|
---|
2191 | for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
|
---|
2192 | ASSERT((caddr_t)key >= tomax);
|
---|
2193 | ASSERT((caddr_t)key < tomax + buf->dtb_size);
|
---|
2194 |
|
---|
2195 | if (hashval != key->dtak_hashval || key->dtak_size != size)
|
---|
2196 | continue;
|
---|
2197 |
|
---|
2198 | kdata = key->dtak_data;
|
---|
2199 | ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
|
---|
2200 |
|
---|
2201 | for (act = agg->dtag_first; act->dta_intuple;
|
---|
2202 | act = act->dta_next) {
|
---|
2203 | i = act->dta_rec.dtrd_offset - agg->dtag_base;
|
---|
2204 | limit = i + act->dta_rec.dtrd_size;
|
---|
2205 | ASSERT(limit <= size);
|
---|
2206 | isstr = DTRACEACT_ISSTRING(act);
|
---|
2207 |
|
---|
2208 | for (; i < limit; i++) {
|
---|
2209 | if (kdata[i] != data[i])
|
---|
2210 | goto next;
|
---|
2211 |
|
---|
2212 | if (isstr && data[i] == '\0')
|
---|
2213 | break;
|
---|
2214 | }
|
---|
2215 | }
|
---|
2216 |
|
---|
2217 | if (action != key->dtak_action) {
|
---|
2218 | /*
|
---|
2219 | * We are aggregating on the same value in the same
|
---|
2220 | * aggregation with two different aggregating actions.
|
---|
2221 | * (This should have been picked up in the compiler,
|
---|
2222 | * so we may be dealing with errant or devious DIF.)
|
---|
2223 | * This is an error condition; we indicate as much,
|
---|
2224 | * and return.
|
---|
2225 | */
|
---|
2226 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
2227 | return;
|
---|
2228 | }
|
---|
2229 |
|
---|
2230 | /*
|
---|
2231 | * This is a hit: we need to apply the aggregator to
|
---|
2232 | * the value at this key.
|
---|
2233 | */
|
---|
2234 | agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
|
---|
2235 | return;
|
---|
2236 | next:
|
---|
2237 | continue;
|
---|
2238 | }
|
---|
2239 |
|
---|
2240 | /*
|
---|
2241 | * We didn't find it. We need to allocate some zero-filled space,
|
---|
2242 | * link it into the hash table appropriately, and apply the aggregator
|
---|
2243 | * to the (zero-filled) value.
|
---|
2244 | */
|
---|
2245 | offs = buf->dtb_offset;
|
---|
2246 | while (offs & (align - 1))
|
---|
2247 | offs += sizeof (uint32_t);
|
---|
2248 |
|
---|
2249 | /*
|
---|
2250 | * If we don't have enough room to both allocate a new key _and_
|
---|
2251 | * its associated data, increment the drop count and return.
|
---|
2252 | */
|
---|
2253 | if ((uintptr_t)tomax + offs + fsize >
|
---|
2254 | agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
|
---|
2255 | dtrace_buffer_drop(buf);
|
---|
2256 | return;
|
---|
2257 | }
|
---|
2258 |
|
---|
2259 | /*CONSTCOND*/
|
---|
2260 | ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
|
---|
2261 | key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
|
---|
2262 | agb->dtagb_free -= sizeof (dtrace_aggkey_t);
|
---|
2263 |
|
---|
2264 | key->dtak_data = kdata = tomax + offs;
|
---|
2265 | buf->dtb_offset = offs + fsize;
|
---|
2266 |
|
---|
2267 | /*
|
---|
2268 | * Now copy the data across.
|
---|
2269 | */
|
---|
2270 | *((dtrace_aggid_t *)kdata) = agg->dtag_id;
|
---|
2271 |
|
---|
2272 | for (i = sizeof (dtrace_aggid_t); i < size; i++)
|
---|
2273 | kdata[i] = data[i];
|
---|
2274 |
|
---|
2275 | /*
|
---|
2276 | * Because strings are not zeroed out by default, we need to iterate
|
---|
2277 | * looking for actions that store strings, and we need to explicitly
|
---|
2278 | * pad these strings out with zeroes.
|
---|
2279 | */
|
---|
2280 | for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
|
---|
2281 | int nul;
|
---|
2282 |
|
---|
2283 | if (!DTRACEACT_ISSTRING(act))
|
---|
2284 | continue;
|
---|
2285 |
|
---|
2286 | i = act->dta_rec.dtrd_offset - agg->dtag_base;
|
---|
2287 | limit = i + act->dta_rec.dtrd_size;
|
---|
2288 | ASSERT(limit <= size);
|
---|
2289 |
|
---|
2290 | for (nul = 0; i < limit; i++) {
|
---|
2291 | if (nul) {
|
---|
2292 | kdata[i] = '\0';
|
---|
2293 | continue;
|
---|
2294 | }
|
---|
2295 |
|
---|
2296 | if (data[i] != '\0')
|
---|
2297 | continue;
|
---|
2298 |
|
---|
2299 | nul = 1;
|
---|
2300 | }
|
---|
2301 | }
|
---|
2302 |
|
---|
2303 | for (i = size; i < fsize; i++)
|
---|
2304 | kdata[i] = 0;
|
---|
2305 |
|
---|
2306 | key->dtak_hashval = hashval;
|
---|
2307 | key->dtak_size = size;
|
---|
2308 | key->dtak_action = action;
|
---|
2309 | key->dtak_next = agb->dtagb_hash[ndx];
|
---|
2310 | agb->dtagb_hash[ndx] = key;
|
---|
2311 |
|
---|
2312 | /*
|
---|
2313 | * Finally, apply the aggregator.
|
---|
2314 | */
|
---|
2315 | *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
|
---|
2316 | agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
|
---|
2317 | }
|
---|
2318 |
|
---|
2319 | /*
|
---|
2320 | * Given consumer state, this routine finds a speculation in the INACTIVE
|
---|
2321 | * state and transitions it into the ACTIVE state. If there is no speculation
|
---|
2322 | * in the INACTIVE state, 0 is returned. In this case, no error counter is
|
---|
2323 | * incremented -- it is up to the caller to take appropriate action.
|
---|
2324 | */
|
---|
2325 | static int
|
---|
2326 | dtrace_speculation(dtrace_state_t *state)
|
---|
2327 | {
|
---|
2328 | int i = 0;
|
---|
2329 | dtrace_speculation_state_t current;
|
---|
2330 | uint32_t *stat = &state->dts_speculations_unavail, count;
|
---|
2331 |
|
---|
2332 | while (i < state->dts_nspeculations) {
|
---|
2333 | dtrace_speculation_t *spec = &state->dts_speculations[i];
|
---|
2334 |
|
---|
2335 | current = spec->dtsp_state;
|
---|
2336 |
|
---|
2337 | if (current != DTRACESPEC_INACTIVE) {
|
---|
2338 | if (current == DTRACESPEC_COMMITTINGMANY ||
|
---|
2339 | current == DTRACESPEC_COMMITTING ||
|
---|
2340 | current == DTRACESPEC_DISCARDING)
|
---|
2341 | stat = &state->dts_speculations_busy;
|
---|
2342 | i++;
|
---|
2343 | continue;
|
---|
2344 | }
|
---|
2345 |
|
---|
2346 | if ( (dtrace_speculation_state_t)dtrace_cas32((uint32_t *)&spec->dtsp_state, current, DTRACESPEC_ACTIVE)
|
---|
2347 | == current)
|
---|
2348 | return (i + 1);
|
---|
2349 | }
|
---|
2350 |
|
---|
2351 | /*
|
---|
2352 | * We couldn't find a speculation. If we found as much as a single
|
---|
2353 | * busy speculation buffer, we'll attribute this failure as "busy"
|
---|
2354 | * instead of "unavail".
|
---|
2355 | */
|
---|
2356 | do {
|
---|
2357 | count = *stat;
|
---|
2358 | } while (dtrace_cas32(stat, count, count + 1) != count);
|
---|
2359 |
|
---|
2360 | return (0);
|
---|
2361 | }
|
---|
2362 |
|
---|
2363 | /*
|
---|
2364 | * This routine commits an active speculation. If the specified speculation
|
---|
2365 | * is not in a valid state to perform a commit(), this routine will silently do
|
---|
2366 | * nothing. The state of the specified speculation is transitioned according
|
---|
2367 | * to the state transition diagram outlined in <sys/dtrace_impl.h>
|
---|
2368 | */
|
---|
2369 | static void
|
---|
2370 | dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
|
---|
2371 | dtrace_specid_t which)
|
---|
2372 | {
|
---|
2373 | dtrace_speculation_t *spec;
|
---|
2374 | dtrace_buffer_t *src, *dest;
|
---|
2375 | uintptr_t daddr, saddr, dlimit;
|
---|
2376 | dtrace_speculation_state_t current, new VBDTUNASS(-1);
|
---|
2377 | intptr_t offs;
|
---|
2378 |
|
---|
2379 | if (which == 0)
|
---|
2380 | return;
|
---|
2381 |
|
---|
2382 | if (which > VBDTCAST(unsigned)state->dts_nspeculations) {
|
---|
2383 | cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2384 | return;
|
---|
2385 | }
|
---|
2386 |
|
---|
2387 | spec = &state->dts_speculations[which - 1];
|
---|
2388 | src = &spec->dtsp_buffer[cpu];
|
---|
2389 | dest = &state->dts_buffer[cpu];
|
---|
2390 |
|
---|
2391 | do {
|
---|
2392 | current = spec->dtsp_state;
|
---|
2393 |
|
---|
2394 | if (current == DTRACESPEC_COMMITTINGMANY)
|
---|
2395 | break;
|
---|
2396 |
|
---|
2397 | switch (current) {
|
---|
2398 | case DTRACESPEC_INACTIVE:
|
---|
2399 | case DTRACESPEC_DISCARDING:
|
---|
2400 | return;
|
---|
2401 |
|
---|
2402 | case DTRACESPEC_COMMITTING:
|
---|
2403 | /*
|
---|
2404 | * This is only possible if we are (a) commit()'ing
|
---|
2405 | * without having done a prior speculate() on this CPU
|
---|
2406 | * and (b) racing with another commit() on a different
|
---|
2407 | * CPU. There's nothing to do -- we just assert that
|
---|
2408 | * our offset is 0.
|
---|
2409 | */
|
---|
2410 | ASSERT(src->dtb_offset == 0);
|
---|
2411 | return;
|
---|
2412 |
|
---|
2413 | case DTRACESPEC_ACTIVE:
|
---|
2414 | new = DTRACESPEC_COMMITTING;
|
---|
2415 | break;
|
---|
2416 |
|
---|
2417 | case DTRACESPEC_ACTIVEONE:
|
---|
2418 | /*
|
---|
2419 | * This speculation is active on one CPU. If our
|
---|
2420 | * buffer offset is non-zero, we know that the one CPU
|
---|
2421 | * must be us. Otherwise, we are committing on a
|
---|
2422 | * different CPU from the speculate(), and we must
|
---|
2423 | * rely on being asynchronously cleaned.
|
---|
2424 | */
|
---|
2425 | if (src->dtb_offset != 0) {
|
---|
2426 | new = DTRACESPEC_COMMITTING;
|
---|
2427 | break;
|
---|
2428 | }
|
---|
2429 | RT_FALL_THRU();
|
---|
2430 |
|
---|
2431 | case DTRACESPEC_ACTIVEMANY:
|
---|
2432 | new = DTRACESPEC_COMMITTINGMANY;
|
---|
2433 | break;
|
---|
2434 |
|
---|
2435 | default:
|
---|
2436 | #ifndef VBOX
|
---|
2437 | ASSERT(0);
|
---|
2438 | #else
|
---|
2439 | AssertFatalMsgFailed(("%d\n", current));
|
---|
2440 | #endif
|
---|
2441 | }
|
---|
2442 | } while ((dtrace_speculation_state_t)dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current);
|
---|
2443 |
|
---|
2444 | /*
|
---|
2445 | * We have set the state to indicate that we are committing this
|
---|
2446 | * speculation. Now reserve the necessary space in the destination
|
---|
2447 | * buffer.
|
---|
2448 | */
|
---|
2449 | if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
|
---|
2450 | sizeof (uint64_t), state, NULL)) < 0) {
|
---|
2451 | dtrace_buffer_drop(dest);
|
---|
2452 | goto out;
|
---|
2453 | }
|
---|
2454 |
|
---|
2455 | /*
|
---|
2456 | * We have the space; copy the buffer across. (Note that this is a
|
---|
2457 | * highly subobtimal bcopy(); in the unlikely event that this becomes
|
---|
2458 | * a serious performance issue, a high-performance DTrace-specific
|
---|
2459 | * bcopy() should obviously be invented.)
|
---|
2460 | */
|
---|
2461 | daddr = (uintptr_t)dest->dtb_tomax + offs;
|
---|
2462 | dlimit = daddr + src->dtb_offset;
|
---|
2463 | saddr = (uintptr_t)src->dtb_tomax;
|
---|
2464 |
|
---|
2465 | /*
|
---|
2466 | * First, the aligned portion.
|
---|
2467 | */
|
---|
2468 | while (dlimit - daddr >= sizeof (uint64_t)) {
|
---|
2469 | *((uint64_t *)daddr) = *((uint64_t *)saddr);
|
---|
2470 |
|
---|
2471 | daddr += sizeof (uint64_t);
|
---|
2472 | saddr += sizeof (uint64_t);
|
---|
2473 | }
|
---|
2474 |
|
---|
2475 | /*
|
---|
2476 | * Now any left-over bit...
|
---|
2477 | */
|
---|
2478 | while (dlimit - daddr)
|
---|
2479 | *((uint8_t *)daddr++) = *((uint8_t *)saddr++);
|
---|
2480 |
|
---|
2481 | /*
|
---|
2482 | * Finally, commit the reserved space in the destination buffer.
|
---|
2483 | */
|
---|
2484 | dest->dtb_offset = offs + src->dtb_offset;
|
---|
2485 |
|
---|
2486 | out:
|
---|
2487 | /*
|
---|
2488 | * If we're lucky enough to be the only active CPU on this speculation
|
---|
2489 | * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
|
---|
2490 | */
|
---|
2491 | if (current == DTRACESPEC_ACTIVE ||
|
---|
2492 | (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
|
---|
2493 | uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
|
---|
2494 | DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
|
---|
2495 |
|
---|
2496 | ASSERT(rval == DTRACESPEC_COMMITTING); NOREF(rval);
|
---|
2497 | }
|
---|
2498 |
|
---|
2499 | src->dtb_offset = 0;
|
---|
2500 | src->dtb_xamot_drops += src->dtb_drops;
|
---|
2501 | src->dtb_drops = 0;
|
---|
2502 | }
|
---|
2503 |
|
---|
2504 | /*
|
---|
2505 | * This routine discards an active speculation. If the specified speculation
|
---|
2506 | * is not in a valid state to perform a discard(), this routine will silently
|
---|
2507 | * do nothing. The state of the specified speculation is transitioned
|
---|
2508 | * according to the state transition diagram outlined in <sys/dtrace_impl.h>
|
---|
2509 | */
|
---|
2510 | static void
|
---|
2511 | dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
|
---|
2512 | dtrace_specid_t which)
|
---|
2513 | {
|
---|
2514 | dtrace_speculation_t *spec;
|
---|
2515 | dtrace_speculation_state_t current, new;
|
---|
2516 | dtrace_buffer_t *buf;
|
---|
2517 |
|
---|
2518 | if (which == 0)
|
---|
2519 | return;
|
---|
2520 |
|
---|
2521 | if (which > VBDTCAST(unsigned)state->dts_nspeculations) {
|
---|
2522 | cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2523 | return;
|
---|
2524 | }
|
---|
2525 |
|
---|
2526 | spec = &state->dts_speculations[which - 1];
|
---|
2527 | buf = &spec->dtsp_buffer[cpu];
|
---|
2528 |
|
---|
2529 | do {
|
---|
2530 | current = spec->dtsp_state;
|
---|
2531 |
|
---|
2532 | switch (current) {
|
---|
2533 | case DTRACESPEC_INACTIVE:
|
---|
2534 | case DTRACESPEC_COMMITTINGMANY:
|
---|
2535 | case DTRACESPEC_COMMITTING:
|
---|
2536 | case DTRACESPEC_DISCARDING:
|
---|
2537 | return;
|
---|
2538 |
|
---|
2539 | case DTRACESPEC_ACTIVE:
|
---|
2540 | case DTRACESPEC_ACTIVEMANY:
|
---|
2541 | new = DTRACESPEC_DISCARDING;
|
---|
2542 | break;
|
---|
2543 |
|
---|
2544 | case DTRACESPEC_ACTIVEONE:
|
---|
2545 | if (buf->dtb_offset != 0) {
|
---|
2546 | new = DTRACESPEC_INACTIVE;
|
---|
2547 | } else {
|
---|
2548 | new = DTRACESPEC_DISCARDING;
|
---|
2549 | }
|
---|
2550 | break;
|
---|
2551 |
|
---|
2552 | default:
|
---|
2553 | #ifndef VBOX
|
---|
2554 | ASSERT(0);
|
---|
2555 | #else
|
---|
2556 | AssertFatalMsgFailed(("%d\n", current));
|
---|
2557 | #endif
|
---|
2558 | }
|
---|
2559 | } while ((dtrace_speculation_state_t)dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current);
|
---|
2560 |
|
---|
2561 | buf->dtb_offset = 0;
|
---|
2562 | buf->dtb_drops = 0;
|
---|
2563 | }
|
---|
2564 |
|
---|
2565 | /*
|
---|
2566 | * Note: not called from probe context. This function is called
|
---|
2567 | * asynchronously from cross call context to clean any speculations that are
|
---|
2568 | * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
|
---|
2569 | * transitioned back to the INACTIVE state until all CPUs have cleaned the
|
---|
2570 | * speculation.
|
---|
2571 | */
|
---|
2572 | static void
|
---|
2573 | dtrace_speculation_clean_here(dtrace_state_t *state)
|
---|
2574 | {
|
---|
2575 | dtrace_icookie_t cookie;
|
---|
2576 | processorid_t cpu = VBDT_GET_CPUID();
|
---|
2577 | dtrace_buffer_t *dest = &state->dts_buffer[cpu];
|
---|
2578 | dtrace_specid_t i;
|
---|
2579 |
|
---|
2580 | cookie = dtrace_interrupt_disable();
|
---|
2581 |
|
---|
2582 | if (dest->dtb_tomax == NULL) {
|
---|
2583 | dtrace_interrupt_enable(cookie);
|
---|
2584 | return;
|
---|
2585 | }
|
---|
2586 |
|
---|
2587 | for (i = 0; i < VBDTCAST(unsigned)state->dts_nspeculations; i++) {
|
---|
2588 | dtrace_speculation_t *spec = &state->dts_speculations[i];
|
---|
2589 | dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
|
---|
2590 |
|
---|
2591 | if (src->dtb_tomax == NULL)
|
---|
2592 | continue;
|
---|
2593 |
|
---|
2594 | if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
|
---|
2595 | src->dtb_offset = 0;
|
---|
2596 | continue;
|
---|
2597 | }
|
---|
2598 |
|
---|
2599 | if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
|
---|
2600 | continue;
|
---|
2601 |
|
---|
2602 | if (src->dtb_offset == 0)
|
---|
2603 | continue;
|
---|
2604 |
|
---|
2605 | dtrace_speculation_commit(state, cpu, i + 1);
|
---|
2606 | }
|
---|
2607 |
|
---|
2608 | dtrace_interrupt_enable(cookie);
|
---|
2609 | }
|
---|
2610 |
|
---|
2611 | #ifdef VBOX
|
---|
2612 | /** */
|
---|
2613 | static DECLCALLBACK(void) dtrace_speculation_clean_here_wrapper(RTCPUID idCpu, void *pvUser1, void *pvUser2)
|
---|
2614 | {
|
---|
2615 | dtrace_speculation_clean_here((dtrace_state_t *)pvUser1);
|
---|
2616 | NOREF(pvUser2); NOREF(idCpu);
|
---|
2617 | }
|
---|
2618 | #endif
|
---|
2619 |
|
---|
2620 | /*
|
---|
2621 | * Note: not called from probe context. This function is called
|
---|
2622 | * asynchronously (and at a regular interval) to clean any speculations that
|
---|
2623 | * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
|
---|
2624 | * is work to be done, it cross calls all CPUs to perform that work;
|
---|
2625 | * COMMITMANY and DISCARDING speculations may not be transitioned back to the
|
---|
2626 | * INACTIVE state until they have been cleaned by all CPUs.
|
---|
2627 | */
|
---|
2628 | static void
|
---|
2629 | dtrace_speculation_clean(dtrace_state_t *state)
|
---|
2630 | {
|
---|
2631 | int work = 0, rv;
|
---|
2632 | dtrace_specid_t i;
|
---|
2633 |
|
---|
2634 | for (i = 0; i < VBDTCAST(unsigned)state->dts_nspeculations; i++) {
|
---|
2635 | dtrace_speculation_t *spec = &state->dts_speculations[i];
|
---|
2636 |
|
---|
2637 | ASSERT(!spec->dtsp_cleaning);
|
---|
2638 |
|
---|
2639 | if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
|
---|
2640 | spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
|
---|
2641 | continue;
|
---|
2642 |
|
---|
2643 | work++;
|
---|
2644 | spec->dtsp_cleaning = 1;
|
---|
2645 | }
|
---|
2646 |
|
---|
2647 | if (!work)
|
---|
2648 | return;
|
---|
2649 |
|
---|
2650 | #ifndef VBOX
|
---|
2651 | dtrace_xcall(DTRACE_CPUALL,
|
---|
2652 | (dtrace_xcall_t)dtrace_speculation_clean_here, state);
|
---|
2653 | #else
|
---|
2654 | RTMpOnAll(dtrace_speculation_clean_here_wrapper, state, NULL);
|
---|
2655 | #endif
|
---|
2656 |
|
---|
2657 | /*
|
---|
2658 | * We now know that all CPUs have committed or discarded their
|
---|
2659 | * speculation buffers, as appropriate. We can now set the state
|
---|
2660 | * to inactive.
|
---|
2661 | */
|
---|
2662 | for (i = 0; i < VBDTCAST(unsigned)state->dts_nspeculations; i++) {
|
---|
2663 | dtrace_speculation_t *spec = &state->dts_speculations[i];
|
---|
2664 | dtrace_speculation_state_t current, new;
|
---|
2665 |
|
---|
2666 | if (!spec->dtsp_cleaning)
|
---|
2667 | continue;
|
---|
2668 |
|
---|
2669 | current = spec->dtsp_state;
|
---|
2670 | ASSERT(current == DTRACESPEC_DISCARDING ||
|
---|
2671 | current == DTRACESPEC_COMMITTINGMANY);
|
---|
2672 |
|
---|
2673 | new = DTRACESPEC_INACTIVE;
|
---|
2674 |
|
---|
2675 | rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
|
---|
2676 | ASSERT(VBDTCAST(dtrace_speculation_state_t)rv == current);
|
---|
2677 | spec->dtsp_cleaning = 0;
|
---|
2678 | }
|
---|
2679 | }
|
---|
2680 |
|
---|
2681 | /*
|
---|
2682 | * Called as part of a speculate() to get the speculative buffer associated
|
---|
2683 | * with a given speculation. Returns NULL if the specified speculation is not
|
---|
2684 | * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
|
---|
2685 | * the active CPU is not the specified CPU -- the speculation will be
|
---|
2686 | * atomically transitioned into the ACTIVEMANY state.
|
---|
2687 | */
|
---|
2688 | static dtrace_buffer_t *
|
---|
2689 | dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
|
---|
2690 | dtrace_specid_t which)
|
---|
2691 | {
|
---|
2692 | dtrace_speculation_t *spec;
|
---|
2693 | dtrace_speculation_state_t current, new VBDTUNASS(-1);
|
---|
2694 | dtrace_buffer_t *buf;
|
---|
2695 |
|
---|
2696 | if (which == 0)
|
---|
2697 | return (NULL);
|
---|
2698 |
|
---|
2699 | if (which > VBDTCAST(unsigned)state->dts_nspeculations) {
|
---|
2700 | cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2701 | return (NULL);
|
---|
2702 | }
|
---|
2703 |
|
---|
2704 | spec = &state->dts_speculations[which - 1];
|
---|
2705 | buf = &spec->dtsp_buffer[cpuid];
|
---|
2706 |
|
---|
2707 | do {
|
---|
2708 | current = spec->dtsp_state;
|
---|
2709 |
|
---|
2710 | switch (current) {
|
---|
2711 | case DTRACESPEC_INACTIVE:
|
---|
2712 | case DTRACESPEC_COMMITTINGMANY:
|
---|
2713 | case DTRACESPEC_DISCARDING:
|
---|
2714 | return (NULL);
|
---|
2715 |
|
---|
2716 | case DTRACESPEC_COMMITTING:
|
---|
2717 | ASSERT(buf->dtb_offset == 0);
|
---|
2718 | return (NULL);
|
---|
2719 |
|
---|
2720 | case DTRACESPEC_ACTIVEONE:
|
---|
2721 | /*
|
---|
2722 | * This speculation is currently active on one CPU.
|
---|
2723 | * Check the offset in the buffer; if it's non-zero,
|
---|
2724 | * that CPU must be us (and we leave the state alone).
|
---|
2725 | * If it's zero, assume that we're starting on a new
|
---|
2726 | * CPU -- and change the state to indicate that the
|
---|
2727 | * speculation is active on more than one CPU.
|
---|
2728 | */
|
---|
2729 | if (buf->dtb_offset != 0)
|
---|
2730 | return (buf);
|
---|
2731 |
|
---|
2732 | new = DTRACESPEC_ACTIVEMANY;
|
---|
2733 | break;
|
---|
2734 |
|
---|
2735 | case DTRACESPEC_ACTIVEMANY:
|
---|
2736 | return (buf);
|
---|
2737 |
|
---|
2738 | case DTRACESPEC_ACTIVE:
|
---|
2739 | new = DTRACESPEC_ACTIVEONE;
|
---|
2740 | break;
|
---|
2741 |
|
---|
2742 | default:
|
---|
2743 | #ifndef VBOX
|
---|
2744 | ASSERT(0);
|
---|
2745 | #else
|
---|
2746 | AssertFatalMsgFailed(("%d\n", current));
|
---|
2747 | #endif
|
---|
2748 | }
|
---|
2749 | } while ((dtrace_speculation_state_t)dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new) != current);
|
---|
2750 |
|
---|
2751 | ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
|
---|
2752 | return (buf);
|
---|
2753 | }
|
---|
2754 |
|
---|
2755 | /*
|
---|
2756 | * Return a string. In the event that the user lacks the privilege to access
|
---|
2757 | * arbitrary kernel memory, we copy the string out to scratch memory so that we
|
---|
2758 | * don't fail access checking.
|
---|
2759 | *
|
---|
2760 | * dtrace_dif_variable() uses this routine as a helper for various
|
---|
2761 | * builtin values such as 'execname' and 'probefunc.'
|
---|
2762 | */
|
---|
2763 | VBDTSTATIC uintptr_t
|
---|
2764 | dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
|
---|
2765 | dtrace_mstate_t *mstate)
|
---|
2766 | {
|
---|
2767 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
2768 | uintptr_t ret;
|
---|
2769 | size_t strsz;
|
---|
2770 |
|
---|
2771 | /*
|
---|
2772 | * The easy case: this probe is allowed to read all of memory, so
|
---|
2773 | * we can just return this as a vanilla pointer.
|
---|
2774 | */
|
---|
2775 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
|
---|
2776 | return (addr);
|
---|
2777 |
|
---|
2778 | /*
|
---|
2779 | * This is the tougher case: we copy the string in question from
|
---|
2780 | * kernel memory into scratch memory and return it that way: this
|
---|
2781 | * ensures that we won't trip up when access checking tests the
|
---|
2782 | * BYREF return value.
|
---|
2783 | */
|
---|
2784 | strsz = dtrace_strlen((char *)addr, size) + 1;
|
---|
2785 |
|
---|
2786 | if (mstate->dtms_scratch_ptr + strsz >
|
---|
2787 | mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
|
---|
2788 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
2789 | return (NULL);
|
---|
2790 | }
|
---|
2791 |
|
---|
2792 | dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
|
---|
2793 | strsz);
|
---|
2794 | ret = mstate->dtms_scratch_ptr;
|
---|
2795 | mstate->dtms_scratch_ptr += strsz;
|
---|
2796 | return (ret);
|
---|
2797 | }
|
---|
2798 |
|
---|
2799 | /*
|
---|
2800 | * This function implements the DIF emulator's variable lookups. The emulator
|
---|
2801 | * passes a reserved variable identifier and optional built-in array index.
|
---|
2802 | */
|
---|
2803 | static uint64_t
|
---|
2804 | dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
|
---|
2805 | uint64_t ndx)
|
---|
2806 | {
|
---|
2807 | /*
|
---|
2808 | * If we're accessing one of the uncached arguments, we'll turn this
|
---|
2809 | * into a reference in the args array.
|
---|
2810 | */
|
---|
2811 | if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
|
---|
2812 | ndx = v - DIF_VAR_ARG0;
|
---|
2813 | v = DIF_VAR_ARGS;
|
---|
2814 | }
|
---|
2815 |
|
---|
2816 | switch (v) {
|
---|
2817 | case DIF_VAR_ARGS:
|
---|
2818 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
|
---|
2819 | if (ndx >= sizeof (mstate->dtms_arg) /
|
---|
2820 | sizeof (mstate->dtms_arg[0])) {
|
---|
2821 | int aframes = mstate->dtms_probe->dtpr_aframes + 2;
|
---|
2822 | dtrace_provider_t *pv;
|
---|
2823 | uint64_t val;
|
---|
2824 |
|
---|
2825 | pv = mstate->dtms_probe->dtpr_provider;
|
---|
2826 | if (pv->dtpv_pops.dtps_getargval != NULL)
|
---|
2827 | val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
|
---|
2828 | mstate->dtms_probe->dtpr_id,
|
---|
2829 | mstate->dtms_probe->dtpr_arg, ndx, aframes);
|
---|
2830 | else
|
---|
2831 | val = dtrace_getarg(ndx, aframes);
|
---|
2832 |
|
---|
2833 | /*
|
---|
2834 | * This is regrettably required to keep the compiler
|
---|
2835 | * from tail-optimizing the call to dtrace_getarg().
|
---|
2836 | * The condition always evaluates to true, but the
|
---|
2837 | * compiler has no way of figuring that out a priori.
|
---|
2838 | * (None of this would be necessary if the compiler
|
---|
2839 | * could be relied upon to _always_ tail-optimize
|
---|
2840 | * the call to dtrace_getarg() -- but it can't.)
|
---|
2841 | */
|
---|
2842 | if (mstate->dtms_probe != NULL)
|
---|
2843 | return (val);
|
---|
2844 |
|
---|
2845 | #ifndef VBOX
|
---|
2846 | ASSERT(0);
|
---|
2847 | #else
|
---|
2848 | AssertFatalFailed();
|
---|
2849 | #endif
|
---|
2850 | }
|
---|
2851 |
|
---|
2852 | return (mstate->dtms_arg[ndx]);
|
---|
2853 |
|
---|
2854 | case DIF_VAR_UREGS: {
|
---|
2855 | #ifndef VBOX
|
---|
2856 | klwp_t *lwp;
|
---|
2857 |
|
---|
2858 | if (!dtrace_priv_proc(state))
|
---|
2859 | return (0);
|
---|
2860 |
|
---|
2861 | if ((lwp = curthread->t_lwp) == NULL) {
|
---|
2862 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
|
---|
2863 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval = NULL;
|
---|
2864 | return (0);
|
---|
2865 | }
|
---|
2866 |
|
---|
2867 | return (dtrace_getreg(lwp->lwp_regs, ndx));
|
---|
2868 | #else
|
---|
2869 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2870 | return (0);
|
---|
2871 | #endif
|
---|
2872 | }
|
---|
2873 |
|
---|
2874 | case DIF_VAR_CURTHREAD:
|
---|
2875 | if (!dtrace_priv_kernel(state))
|
---|
2876 | return (0);
|
---|
2877 | #ifndef VBOX
|
---|
2878 | return ((uint64_t)(uintptr_t)curthread);
|
---|
2879 | #else
|
---|
2880 | return ((uintptr_t)RTThreadNativeSelf());
|
---|
2881 | #endif
|
---|
2882 |
|
---|
2883 | case DIF_VAR_TIMESTAMP:
|
---|
2884 | if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
|
---|
2885 | mstate->dtms_timestamp = dtrace_gethrtime();
|
---|
2886 | mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
|
---|
2887 | }
|
---|
2888 | return (mstate->dtms_timestamp);
|
---|
2889 |
|
---|
2890 | case DIF_VAR_VTIMESTAMP:
|
---|
2891 | #ifndef VBOX
|
---|
2892 | ASSERT(dtrace_vtime_references != 0);
|
---|
2893 | return (curthread->t_dtrace_vtime);
|
---|
2894 | #else
|
---|
2895 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
2896 | return (0);
|
---|
2897 | #endif
|
---|
2898 |
|
---|
2899 | case DIF_VAR_WALLTIMESTAMP:
|
---|
2900 | if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
|
---|
2901 | mstate->dtms_walltimestamp = dtrace_gethrestime();
|
---|
2902 | mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
|
---|
2903 | }
|
---|
2904 | return (mstate->dtms_walltimestamp);
|
---|
2905 |
|
---|
2906 | case DIF_VAR_IPL:
|
---|
2907 | if (!dtrace_priv_kernel(state))
|
---|
2908 | return (0);
|
---|
2909 | if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
|
---|
2910 | mstate->dtms_ipl = dtrace_getipl();
|
---|
2911 | mstate->dtms_present |= DTRACE_MSTATE_IPL;
|
---|
2912 | }
|
---|
2913 | return (mstate->dtms_ipl);
|
---|
2914 |
|
---|
2915 | case DIF_VAR_EPID:
|
---|
2916 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
|
---|
2917 | return (mstate->dtms_epid);
|
---|
2918 |
|
---|
2919 | case DIF_VAR_ID:
|
---|
2920 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
2921 | return (mstate->dtms_probe->dtpr_id);
|
---|
2922 |
|
---|
2923 | case DIF_VAR_STACKDEPTH:
|
---|
2924 | if (!dtrace_priv_kernel(state))
|
---|
2925 | return (0);
|
---|
2926 | if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
|
---|
2927 | int aframes = mstate->dtms_probe->dtpr_aframes + 2;
|
---|
2928 |
|
---|
2929 | mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
|
---|
2930 | mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
|
---|
2931 | }
|
---|
2932 | return (mstate->dtms_stackdepth);
|
---|
2933 |
|
---|
2934 | case DIF_VAR_USTACKDEPTH:
|
---|
2935 | if (!dtrace_priv_proc(state))
|
---|
2936 | return (0);
|
---|
2937 | if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
|
---|
2938 | /*
|
---|
2939 | * See comment in DIF_VAR_PID.
|
---|
2940 | */
|
---|
2941 | if (DTRACE_ANCHORED(mstate->dtms_probe) &&
|
---|
2942 | CPU_ON_INTR(CPU)) {
|
---|
2943 | mstate->dtms_ustackdepth = 0;
|
---|
2944 | } else {
|
---|
2945 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
2946 | mstate->dtms_ustackdepth =
|
---|
2947 | dtrace_getustackdepth();
|
---|
2948 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
2949 | }
|
---|
2950 | mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
|
---|
2951 | }
|
---|
2952 | return (mstate->dtms_ustackdepth);
|
---|
2953 |
|
---|
2954 | case DIF_VAR_CALLER:
|
---|
2955 | if (!dtrace_priv_kernel(state))
|
---|
2956 | return (0);
|
---|
2957 | if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
|
---|
2958 | int aframes = mstate->dtms_probe->dtpr_aframes + 2;
|
---|
2959 |
|
---|
2960 | if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
|
---|
2961 | /*
|
---|
2962 | * If this is an unanchored probe, we are
|
---|
2963 | * required to go through the slow path:
|
---|
2964 | * dtrace_caller() only guarantees correct
|
---|
2965 | * results for anchored probes.
|
---|
2966 | */
|
---|
2967 | pc_t caller[2];
|
---|
2968 |
|
---|
2969 | dtrace_getpcstack(caller, 2, aframes,
|
---|
2970 | (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
|
---|
2971 | mstate->dtms_caller = caller[1];
|
---|
2972 | } else if ((mstate->dtms_caller =
|
---|
2973 | dtrace_caller(aframes)) == VBDTCAST(uintptr_t)-1) {
|
---|
2974 | /*
|
---|
2975 | * We have failed to do this the quick way;
|
---|
2976 | * we must resort to the slower approach of
|
---|
2977 | * calling dtrace_getpcstack().
|
---|
2978 | */
|
---|
2979 | pc_t caller;
|
---|
2980 |
|
---|
2981 | dtrace_getpcstack(&caller, 1, aframes, NULL);
|
---|
2982 | mstate->dtms_caller = caller;
|
---|
2983 | }
|
---|
2984 |
|
---|
2985 | mstate->dtms_present |= DTRACE_MSTATE_CALLER;
|
---|
2986 | }
|
---|
2987 | return (mstate->dtms_caller);
|
---|
2988 |
|
---|
2989 | case DIF_VAR_UCALLER:
|
---|
2990 | if (!dtrace_priv_proc(state))
|
---|
2991 | return (0);
|
---|
2992 |
|
---|
2993 | if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
|
---|
2994 | uint64_t ustack[3];
|
---|
2995 |
|
---|
2996 | /*
|
---|
2997 | * dtrace_getupcstack() fills in the first uint64_t
|
---|
2998 | * with the current PID. The second uint64_t will
|
---|
2999 | * be the program counter at user-level. The third
|
---|
3000 | * uint64_t will contain the caller, which is what
|
---|
3001 | * we're after.
|
---|
3002 | */
|
---|
3003 | ustack[2] = NULL;
|
---|
3004 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3005 | dtrace_getupcstack(ustack, 3);
|
---|
3006 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3007 | mstate->dtms_ucaller = ustack[2];
|
---|
3008 | mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
|
---|
3009 | }
|
---|
3010 |
|
---|
3011 | return (mstate->dtms_ucaller);
|
---|
3012 |
|
---|
3013 | case DIF_VAR_PROBEPROV:
|
---|
3014 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
3015 | return (dtrace_dif_varstr(
|
---|
3016 | (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
|
---|
3017 | state, mstate));
|
---|
3018 |
|
---|
3019 | case DIF_VAR_PROBEMOD:
|
---|
3020 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
3021 | return (dtrace_dif_varstr(
|
---|
3022 | (uintptr_t)mstate->dtms_probe->dtpr_mod,
|
---|
3023 | state, mstate));
|
---|
3024 |
|
---|
3025 | case DIF_VAR_PROBEFUNC:
|
---|
3026 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
3027 | return (dtrace_dif_varstr(
|
---|
3028 | (uintptr_t)mstate->dtms_probe->dtpr_func,
|
---|
3029 | state, mstate));
|
---|
3030 |
|
---|
3031 | case DIF_VAR_PROBENAME:
|
---|
3032 | ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
|
---|
3033 | return (dtrace_dif_varstr(
|
---|
3034 | (uintptr_t)mstate->dtms_probe->dtpr_name,
|
---|
3035 | state, mstate));
|
---|
3036 |
|
---|
3037 | case DIF_VAR_PID:
|
---|
3038 | if (!dtrace_priv_proc(state))
|
---|
3039 | return (0);
|
---|
3040 |
|
---|
3041 | #ifndef VBOX
|
---|
3042 | /*
|
---|
3043 | * Note that we are assuming that an unanchored probe is
|
---|
3044 | * always due to a high-level interrupt. (And we're assuming
|
---|
3045 | * that there is only a single high level interrupt.)
|
---|
3046 | */
|
---|
3047 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3048 | return (pid0.pid_id);
|
---|
3049 |
|
---|
3050 | /*
|
---|
3051 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3052 | * it always points to a valid, allocated proc structure.
|
---|
3053 | * Further, it is always safe to dereference the p_pidp member
|
---|
3054 | * of one's own proc structure. (These are truisms becuase
|
---|
3055 | * threads and processes don't clean up their own state --
|
---|
3056 | * they leave that task to whomever reaps them.)
|
---|
3057 | */
|
---|
3058 | return ((uint64_t)curthread->t_procp->p_pidp->pid_id);
|
---|
3059 | #else
|
---|
3060 | return (RTProcSelf());
|
---|
3061 | #endif
|
---|
3062 |
|
---|
3063 | case DIF_VAR_PPID:
|
---|
3064 | if (!dtrace_priv_proc(state))
|
---|
3065 | return (0);
|
---|
3066 |
|
---|
3067 | #ifndef VBOX
|
---|
3068 | /*
|
---|
3069 | * See comment in DIF_VAR_PID.
|
---|
3070 | */
|
---|
3071 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3072 | return (pid0.pid_id);
|
---|
3073 |
|
---|
3074 | /*
|
---|
3075 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3076 | * it always points to a valid, allocated proc structure.
|
---|
3077 | * (This is true because threads don't clean up their own
|
---|
3078 | * state -- they leave that task to whomever reaps them.)
|
---|
3079 | */
|
---|
3080 | return ((uint64_t)curthread->t_procp->p_ppid);
|
---|
3081 | #else
|
---|
3082 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3083 | return (0); /** @todo parent pid? */
|
---|
3084 | #endif
|
---|
3085 |
|
---|
3086 | case DIF_VAR_TID:
|
---|
3087 | #ifndef VBOX
|
---|
3088 | /*
|
---|
3089 | * See comment in DIF_VAR_PID.
|
---|
3090 | */
|
---|
3091 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3092 | return (0);
|
---|
3093 |
|
---|
3094 | return ((uint64_t)curthread->t_tid);
|
---|
3095 | #else
|
---|
3096 | return (RTThreadNativeSelf()); /** @todo proper tid? */
|
---|
3097 | #endif
|
---|
3098 |
|
---|
3099 | case DIF_VAR_EXECNAME:
|
---|
3100 | if (!dtrace_priv_proc(state))
|
---|
3101 | return (0);
|
---|
3102 |
|
---|
3103 | #ifndef VBOX
|
---|
3104 | /*
|
---|
3105 | * See comment in DIF_VAR_PID.
|
---|
3106 | */
|
---|
3107 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3108 | return ((uint64_t)(uintptr_t)p0.p_user.u_comm);
|
---|
3109 |
|
---|
3110 | /*
|
---|
3111 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3112 | * it always points to a valid, allocated proc structure.
|
---|
3113 | * (This is true because threads don't clean up their own
|
---|
3114 | * state -- they leave that task to whomever reaps them.)
|
---|
3115 | */
|
---|
3116 | return (dtrace_dif_varstr(
|
---|
3117 | (uintptr_t)curthread->t_procp->p_user.u_comm,
|
---|
3118 | state, mstate));
|
---|
3119 | #else
|
---|
3120 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3121 | return (0); /** @todo execname */
|
---|
3122 | #endif
|
---|
3123 |
|
---|
3124 | case DIF_VAR_ZONENAME:
|
---|
3125 | if (!dtrace_priv_proc(state))
|
---|
3126 | return (0);
|
---|
3127 |
|
---|
3128 | #ifndef VBOX
|
---|
3129 | /*
|
---|
3130 | * See comment in DIF_VAR_PID.
|
---|
3131 | */
|
---|
3132 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3133 | return ((uint64_t)(uintptr_t)p0.p_zone->zone_name);
|
---|
3134 |
|
---|
3135 | /*
|
---|
3136 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3137 | * it always points to a valid, allocated proc structure.
|
---|
3138 | * (This is true because threads don't clean up their own
|
---|
3139 | * state -- they leave that task to whomever reaps them.)
|
---|
3140 | */
|
---|
3141 | return (dtrace_dif_varstr(
|
---|
3142 | (uintptr_t)curthread->t_procp->p_zone->zone_name,
|
---|
3143 | state, mstate));
|
---|
3144 | #else
|
---|
3145 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3146 | return (0);
|
---|
3147 | #endif
|
---|
3148 |
|
---|
3149 | case DIF_VAR_UID:
|
---|
3150 | if (!dtrace_priv_proc(state))
|
---|
3151 | return (0);
|
---|
3152 |
|
---|
3153 | #ifndef VBOX
|
---|
3154 | /*
|
---|
3155 | * See comment in DIF_VAR_PID.
|
---|
3156 | */
|
---|
3157 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3158 | return ((uint64_t)p0.p_cred->cr_uid);
|
---|
3159 |
|
---|
3160 | /*
|
---|
3161 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3162 | * it always points to a valid, allocated proc structure.
|
---|
3163 | * (This is true because threads don't clean up their own
|
---|
3164 | * state -- they leave that task to whomever reaps them.)
|
---|
3165 | *
|
---|
3166 | * Additionally, it is safe to dereference one's own process
|
---|
3167 | * credential, since this is never NULL after process birth.
|
---|
3168 | */
|
---|
3169 | return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
|
---|
3170 | #else
|
---|
3171 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3172 | return (0);
|
---|
3173 | #endif
|
---|
3174 |
|
---|
3175 | case DIF_VAR_GID:
|
---|
3176 | if (!dtrace_priv_proc(state))
|
---|
3177 | return (0);
|
---|
3178 |
|
---|
3179 | #ifndef VBOX
|
---|
3180 | /*
|
---|
3181 | * See comment in DIF_VAR_PID.
|
---|
3182 | */
|
---|
3183 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3184 | return ((uint64_t)p0.p_cred->cr_gid);
|
---|
3185 |
|
---|
3186 | /*
|
---|
3187 | * It is always safe to dereference one's own t_procp pointer:
|
---|
3188 | * it always points to a valid, allocated proc structure.
|
---|
3189 | * (This is true because threads don't clean up their own
|
---|
3190 | * state -- they leave that task to whomever reaps them.)
|
---|
3191 | *
|
---|
3192 | * Additionally, it is safe to dereference one's own process
|
---|
3193 | * credential, since this is never NULL after process birth.
|
---|
3194 | */
|
---|
3195 | return ((uint64_t)curthread->t_procp->p_cred->cr_gid);
|
---|
3196 | #else
|
---|
3197 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3198 | return (0);
|
---|
3199 | #endif
|
---|
3200 |
|
---|
3201 | case DIF_VAR_ERRNO: {
|
---|
3202 | #ifndef VBOX
|
---|
3203 | klwp_t *lwp;
|
---|
3204 | #endif
|
---|
3205 | if (!dtrace_priv_proc(state))
|
---|
3206 | return (0);
|
---|
3207 |
|
---|
3208 | #ifndef VBOX
|
---|
3209 | /*
|
---|
3210 | * See comment in DIF_VAR_PID.
|
---|
3211 | */
|
---|
3212 | if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
|
---|
3213 | return (0);
|
---|
3214 |
|
---|
3215 | /*
|
---|
3216 | * It is always safe to dereference one's own t_lwp pointer in
|
---|
3217 | * the event that this pointer is non-NULL. (This is true
|
---|
3218 | * because threads and lwps don't clean up their own state --
|
---|
3219 | * they leave that task to whomever reaps them.)
|
---|
3220 | */
|
---|
3221 | if ((lwp = curthread->t_lwp) == NULL)
|
---|
3222 | return (0);
|
---|
3223 |
|
---|
3224 | return ((uint64_t)lwp->lwp_errno);
|
---|
3225 | #else
|
---|
3226 | cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
|
---|
3227 | return (0);
|
---|
3228 | #endif
|
---|
3229 | }
|
---|
3230 | default:
|
---|
3231 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3232 | return (0);
|
---|
3233 | }
|
---|
3234 | }
|
---|
3235 |
|
---|
3236 | /*
|
---|
3237 | * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
|
---|
3238 | * Notice that we don't bother validating the proper number of arguments or
|
---|
3239 | * their types in the tuple stack. This isn't needed because all argument
|
---|
3240 | * interpretation is safe because of our load safety -- the worst that can
|
---|
3241 | * happen is that a bogus program can obtain bogus results.
|
---|
3242 | */
|
---|
3243 | static void
|
---|
3244 | dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
|
---|
3245 | dtrace_key_t *tupregs, int nargs,
|
---|
3246 | dtrace_mstate_t *mstate, dtrace_state_t *state)
|
---|
3247 | {
|
---|
3248 | volatile uint16_t *flags = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
3249 | volatile uintptr_t *illval = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval;
|
---|
3250 | dtrace_vstate_t *vstate = &state->dts_vstate;
|
---|
3251 |
|
---|
3252 | #ifndef VBOX
|
---|
3253 | union {
|
---|
3254 | mutex_impl_t mi;
|
---|
3255 | uint64_t mx;
|
---|
3256 | } m;
|
---|
3257 |
|
---|
3258 | union {
|
---|
3259 | krwlock_t ri;
|
---|
3260 | uintptr_t rw;
|
---|
3261 | } r;
|
---|
3262 | #endif
|
---|
3263 |
|
---|
3264 | switch (subr) {
|
---|
3265 | case DIF_SUBR_RAND:
|
---|
3266 | regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
|
---|
3267 | break;
|
---|
3268 |
|
---|
3269 | case DIF_SUBR_MUTEX_OWNED:
|
---|
3270 | #ifndef VBOX
|
---|
3271 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
|
---|
3272 | mstate, vstate)) {
|
---|
3273 | regs[rd] = NULL;
|
---|
3274 | break;
|
---|
3275 | }
|
---|
3276 |
|
---|
3277 | m.mx = dtrace_load64(tupregs[0].dttk_value);
|
---|
3278 | if (MUTEX_TYPE_ADAPTIVE(&m.mi))
|
---|
3279 | regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
|
---|
3280 | else
|
---|
3281 | regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
|
---|
3282 | #else
|
---|
3283 | regs[rd] = 0;
|
---|
3284 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3285 | #endif
|
---|
3286 | break;
|
---|
3287 |
|
---|
3288 | case DIF_SUBR_MUTEX_OWNER:
|
---|
3289 | #ifndef VBOX
|
---|
3290 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
|
---|
3291 | mstate, vstate)) {
|
---|
3292 | regs[rd] = NULL;
|
---|
3293 | break;
|
---|
3294 | }
|
---|
3295 |
|
---|
3296 | m.mx = dtrace_load64(tupregs[0].dttk_value);
|
---|
3297 | if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
|
---|
3298 | MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
|
---|
3299 | regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
|
---|
3300 | else
|
---|
3301 | regs[rd] = 0;
|
---|
3302 | #else
|
---|
3303 | regs[rd] = 0;
|
---|
3304 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3305 | #endif
|
---|
3306 | break;
|
---|
3307 |
|
---|
3308 | case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
|
---|
3309 | #ifndef VBOX
|
---|
3310 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
|
---|
3311 | mstate, vstate)) {
|
---|
3312 | regs[rd] = NULL;
|
---|
3313 | break;
|
---|
3314 | }
|
---|
3315 |
|
---|
3316 | m.mx = dtrace_load64(tupregs[0].dttk_value);
|
---|
3317 | regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
|
---|
3318 | #else
|
---|
3319 | regs[rd] = 0;
|
---|
3320 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3321 | #endif
|
---|
3322 | break;
|
---|
3323 |
|
---|
3324 | case DIF_SUBR_MUTEX_TYPE_SPIN:
|
---|
3325 | #ifndef VBOX
|
---|
3326 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
|
---|
3327 | mstate, vstate)) {
|
---|
3328 | regs[rd] = NULL;
|
---|
3329 | break;
|
---|
3330 | }
|
---|
3331 |
|
---|
3332 | m.mx = dtrace_load64(tupregs[0].dttk_value);
|
---|
3333 | regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
|
---|
3334 | #else
|
---|
3335 | regs[rd] = 0;
|
---|
3336 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3337 | #endif
|
---|
3338 | break;
|
---|
3339 |
|
---|
3340 | case DIF_SUBR_RW_READ_HELD: {
|
---|
3341 | #ifndef VBOX
|
---|
3342 | uintptr_t tmp;
|
---|
3343 |
|
---|
3344 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
|
---|
3345 | mstate, vstate)) {
|
---|
3346 | regs[rd] = NULL;
|
---|
3347 | break;
|
---|
3348 | }
|
---|
3349 |
|
---|
3350 | r.rw = dtrace_loadptr(tupregs[0].dttk_value);
|
---|
3351 | regs[rd] = _RW_READ_HELD(&r.ri, tmp);
|
---|
3352 | #else
|
---|
3353 | regs[rd] = 0;
|
---|
3354 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3355 | #endif
|
---|
3356 | break;
|
---|
3357 | }
|
---|
3358 |
|
---|
3359 | case DIF_SUBR_RW_WRITE_HELD:
|
---|
3360 | #ifndef VBOX
|
---|
3361 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
|
---|
3362 | mstate, vstate)) {
|
---|
3363 | regs[rd] = NULL;
|
---|
3364 | break;
|
---|
3365 | }
|
---|
3366 |
|
---|
3367 | r.rw = dtrace_loadptr(tupregs[0].dttk_value);
|
---|
3368 | regs[rd] = _RW_WRITE_HELD(&r.ri);
|
---|
3369 | #else
|
---|
3370 | regs[rd] = 0;
|
---|
3371 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3372 | #endif
|
---|
3373 | break;
|
---|
3374 |
|
---|
3375 | case DIF_SUBR_RW_ISWRITER:
|
---|
3376 | #ifndef VBOX
|
---|
3377 | if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
|
---|
3378 | mstate, vstate)) {
|
---|
3379 | regs[rd] = NULL;
|
---|
3380 | break;
|
---|
3381 | }
|
---|
3382 |
|
---|
3383 | r.rw = dtrace_loadptr(tupregs[0].dttk_value);
|
---|
3384 | regs[rd] = _RW_ISWRITER(&r.ri);
|
---|
3385 | #else
|
---|
3386 | regs[rd] = 0;
|
---|
3387 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3388 | #endif
|
---|
3389 | break;
|
---|
3390 |
|
---|
3391 | case DIF_SUBR_BCOPY: {
|
---|
3392 | /*
|
---|
3393 | * We need to be sure that the destination is in the scratch
|
---|
3394 | * region -- no other region is allowed.
|
---|
3395 | */
|
---|
3396 | uintptr_t src = tupregs[0].dttk_value;
|
---|
3397 | uintptr_t dest = tupregs[1].dttk_value;
|
---|
3398 | size_t size = tupregs[2].dttk_value;
|
---|
3399 |
|
---|
3400 | if (!dtrace_inscratch(dest, size, mstate)) {
|
---|
3401 | *flags |= CPU_DTRACE_BADADDR;
|
---|
3402 | *illval = regs[rd];
|
---|
3403 | break;
|
---|
3404 | }
|
---|
3405 |
|
---|
3406 | if (!dtrace_canload(src, size, mstate, vstate)) {
|
---|
3407 | regs[rd] = NULL;
|
---|
3408 | break;
|
---|
3409 | }
|
---|
3410 |
|
---|
3411 | dtrace_bcopy((void *)src, (void *)dest, size);
|
---|
3412 | break;
|
---|
3413 | }
|
---|
3414 |
|
---|
3415 | case DIF_SUBR_ALLOCA:
|
---|
3416 | case DIF_SUBR_COPYIN: {
|
---|
3417 | uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
|
---|
3418 | uint64_t size =
|
---|
3419 | tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
|
---|
3420 | size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
|
---|
3421 |
|
---|
3422 | /*
|
---|
3423 | * This action doesn't require any credential checks since
|
---|
3424 | * probes will not activate in user contexts to which the
|
---|
3425 | * enabling user does not have permissions.
|
---|
3426 | */
|
---|
3427 |
|
---|
3428 | /*
|
---|
3429 | * Rounding up the user allocation size could have overflowed
|
---|
3430 | * a large, bogus allocation (like -1ULL) to 0.
|
---|
3431 | */
|
---|
3432 | if (scratch_size < size ||
|
---|
3433 | !DTRACE_INSCRATCH(mstate, scratch_size)) {
|
---|
3434 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
3435 | regs[rd] = NULL;
|
---|
3436 | break;
|
---|
3437 | }
|
---|
3438 |
|
---|
3439 | if (subr == DIF_SUBR_COPYIN) {
|
---|
3440 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3441 | dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
|
---|
3442 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3443 | }
|
---|
3444 |
|
---|
3445 | mstate->dtms_scratch_ptr += scratch_size;
|
---|
3446 | regs[rd] = dest;
|
---|
3447 | break;
|
---|
3448 | }
|
---|
3449 |
|
---|
3450 | case DIF_SUBR_COPYINTO: {
|
---|
3451 | uint64_t size = tupregs[1].dttk_value;
|
---|
3452 | uintptr_t dest = tupregs[2].dttk_value;
|
---|
3453 |
|
---|
3454 | /*
|
---|
3455 | * This action doesn't require any credential checks since
|
---|
3456 | * probes will not activate in user contexts to which the
|
---|
3457 | * enabling user does not have permissions.
|
---|
3458 | */
|
---|
3459 | if (!dtrace_inscratch(dest, size, mstate)) {
|
---|
3460 | *flags |= CPU_DTRACE_BADADDR;
|
---|
3461 | *illval = regs[rd];
|
---|
3462 | break;
|
---|
3463 | }
|
---|
3464 |
|
---|
3465 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3466 | dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
|
---|
3467 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3468 | break;
|
---|
3469 | }
|
---|
3470 |
|
---|
3471 | case DIF_SUBR_COPYINSTR: {
|
---|
3472 | uintptr_t dest = mstate->dtms_scratch_ptr;
|
---|
3473 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3474 |
|
---|
3475 | if (nargs > 1 && tupregs[1].dttk_value < size)
|
---|
3476 | size = tupregs[1].dttk_value + 1;
|
---|
3477 |
|
---|
3478 | /*
|
---|
3479 | * This action doesn't require any credential checks since
|
---|
3480 | * probes will not activate in user contexts to which the
|
---|
3481 | * enabling user does not have permissions.
|
---|
3482 | */
|
---|
3483 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
3484 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
3485 | regs[rd] = NULL;
|
---|
3486 | break;
|
---|
3487 | }
|
---|
3488 |
|
---|
3489 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3490 | dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
|
---|
3491 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3492 |
|
---|
3493 | ((char *)dest)[size - 1] = '\0';
|
---|
3494 | mstate->dtms_scratch_ptr += size;
|
---|
3495 | regs[rd] = dest;
|
---|
3496 | break;
|
---|
3497 | }
|
---|
3498 |
|
---|
3499 | case DIF_SUBR_MSGSIZE:
|
---|
3500 | case DIF_SUBR_MSGDSIZE: {
|
---|
3501 | #ifndef VBOX
|
---|
3502 | uintptr_t baddr = tupregs[0].dttk_value, daddr;
|
---|
3503 | uintptr_t wptr, rptr;
|
---|
3504 | size_t count = 0;
|
---|
3505 | int cont = 0;
|
---|
3506 |
|
---|
3507 | while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
|
---|
3508 |
|
---|
3509 | if (!dtrace_canload(baddr, sizeof (mblk_t), mstate,
|
---|
3510 | vstate)) {
|
---|
3511 | regs[rd] = NULL;
|
---|
3512 | break;
|
---|
3513 | }
|
---|
3514 |
|
---|
3515 | wptr = dtrace_loadptr(baddr +
|
---|
3516 | offsetof(mblk_t, b_wptr));
|
---|
3517 |
|
---|
3518 | rptr = dtrace_loadptr(baddr +
|
---|
3519 | offsetof(mblk_t, b_rptr));
|
---|
3520 |
|
---|
3521 | if (wptr < rptr) {
|
---|
3522 | *flags |= CPU_DTRACE_BADADDR;
|
---|
3523 | *illval = tupregs[0].dttk_value;
|
---|
3524 | break;
|
---|
3525 | }
|
---|
3526 |
|
---|
3527 | daddr = dtrace_loadptr(baddr +
|
---|
3528 | offsetof(mblk_t, b_datap));
|
---|
3529 |
|
---|
3530 | baddr = dtrace_loadptr(baddr +
|
---|
3531 | offsetof(mblk_t, b_cont));
|
---|
3532 |
|
---|
3533 | /*
|
---|
3534 | * We want to prevent against denial-of-service here,
|
---|
3535 | * so we're only going to search the list for
|
---|
3536 | * dtrace_msgdsize_max mblks.
|
---|
3537 | */
|
---|
3538 | if (cont++ > dtrace_msgdsize_max) {
|
---|
3539 | *flags |= CPU_DTRACE_ILLOP;
|
---|
3540 | break;
|
---|
3541 | }
|
---|
3542 |
|
---|
3543 | if (subr == DIF_SUBR_MSGDSIZE) {
|
---|
3544 | if (dtrace_load8(daddr +
|
---|
3545 | offsetof(dblk_t, db_type)) != M_DATA)
|
---|
3546 | continue;
|
---|
3547 | }
|
---|
3548 |
|
---|
3549 | count += wptr - rptr;
|
---|
3550 | }
|
---|
3551 |
|
---|
3552 | if (!(*flags & CPU_DTRACE_FAULT))
|
---|
3553 | regs[rd] = count;
|
---|
3554 |
|
---|
3555 | #else
|
---|
3556 | regs[rd] = 0;
|
---|
3557 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3558 | #endif
|
---|
3559 | break;
|
---|
3560 | }
|
---|
3561 |
|
---|
3562 | case DIF_SUBR_PROGENYOF: {
|
---|
3563 | #ifndef VBOX
|
---|
3564 | pid_t pid = tupregs[0].dttk_value;
|
---|
3565 | proc_t *p;
|
---|
3566 | int rval = 0;
|
---|
3567 |
|
---|
3568 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3569 |
|
---|
3570 | for (p = curthread->t_procp; p != NULL; p = p->p_parent) {
|
---|
3571 | if (p->p_pidp->pid_id == pid) {
|
---|
3572 | rval = 1;
|
---|
3573 | break;
|
---|
3574 | }
|
---|
3575 | }
|
---|
3576 |
|
---|
3577 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3578 |
|
---|
3579 | regs[rd] = rval;
|
---|
3580 | #else
|
---|
3581 | regs[rd] = 0;
|
---|
3582 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3583 | #endif
|
---|
3584 | break;
|
---|
3585 | }
|
---|
3586 |
|
---|
3587 | case DIF_SUBR_SPECULATION:
|
---|
3588 | regs[rd] = dtrace_speculation(state);
|
---|
3589 | break;
|
---|
3590 |
|
---|
3591 | case DIF_SUBR_COPYOUT: {
|
---|
3592 | uintptr_t kaddr = tupregs[0].dttk_value;
|
---|
3593 | uintptr_t uaddr = tupregs[1].dttk_value;
|
---|
3594 | uint64_t size = tupregs[2].dttk_value;
|
---|
3595 |
|
---|
3596 | if (!dtrace_destructive_disallow &&
|
---|
3597 | dtrace_priv_proc_control(state) &&
|
---|
3598 | !dtrace_istoxic(kaddr, size)) {
|
---|
3599 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3600 | dtrace_copyout(kaddr, uaddr, size, flags);
|
---|
3601 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3602 | }
|
---|
3603 | break;
|
---|
3604 | }
|
---|
3605 |
|
---|
3606 | case DIF_SUBR_COPYOUTSTR: {
|
---|
3607 | uintptr_t kaddr = tupregs[0].dttk_value;
|
---|
3608 | uintptr_t uaddr = tupregs[1].dttk_value;
|
---|
3609 | uint64_t size = tupregs[2].dttk_value;
|
---|
3610 |
|
---|
3611 | if (!dtrace_destructive_disallow &&
|
---|
3612 | dtrace_priv_proc_control(state) &&
|
---|
3613 | !dtrace_istoxic(kaddr, size)) {
|
---|
3614 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
|
---|
3615 | dtrace_copyoutstr(kaddr, uaddr, size, flags);
|
---|
3616 | DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
|
---|
3617 | }
|
---|
3618 | break;
|
---|
3619 | }
|
---|
3620 |
|
---|
3621 | case DIF_SUBR_STRLEN: {
|
---|
3622 | size_t sz;
|
---|
3623 | uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
|
---|
3624 | sz = dtrace_strlen((char *)addr,
|
---|
3625 | state->dts_options[DTRACEOPT_STRSIZE]);
|
---|
3626 |
|
---|
3627 | if (!dtrace_canload(addr, sz + 1, mstate, vstate)) {
|
---|
3628 | regs[rd] = NULL;
|
---|
3629 | break;
|
---|
3630 | }
|
---|
3631 |
|
---|
3632 | regs[rd] = sz;
|
---|
3633 |
|
---|
3634 | break;
|
---|
3635 | }
|
---|
3636 |
|
---|
3637 | case DIF_SUBR_STRCHR:
|
---|
3638 | case DIF_SUBR_STRRCHR: {
|
---|
3639 | /*
|
---|
3640 | * We're going to iterate over the string looking for the
|
---|
3641 | * specified character. We will iterate until we have reached
|
---|
3642 | * the string length or we have found the character. If this
|
---|
3643 | * is DIF_SUBR_STRRCHR, we will look for the last occurrence
|
---|
3644 | * of the specified character instead of the first.
|
---|
3645 | */
|
---|
3646 | uintptr_t saddr = tupregs[0].dttk_value;
|
---|
3647 | uintptr_t addr = tupregs[0].dttk_value;
|
---|
3648 | uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3649 | char c, target = (char)tupregs[1].dttk_value;
|
---|
3650 |
|
---|
3651 | for (regs[rd] = NULL; addr < limit; addr++) {
|
---|
3652 | if ((c = dtrace_load8(addr)) == target) {
|
---|
3653 | regs[rd] = addr;
|
---|
3654 |
|
---|
3655 | if (subr == DIF_SUBR_STRCHR)
|
---|
3656 | break;
|
---|
3657 | }
|
---|
3658 |
|
---|
3659 | if (c == '\0')
|
---|
3660 | break;
|
---|
3661 | }
|
---|
3662 |
|
---|
3663 | if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) {
|
---|
3664 | regs[rd] = NULL;
|
---|
3665 | break;
|
---|
3666 | }
|
---|
3667 |
|
---|
3668 | break;
|
---|
3669 | }
|
---|
3670 |
|
---|
3671 | case DIF_SUBR_STRSTR:
|
---|
3672 | case DIF_SUBR_INDEX:
|
---|
3673 | case DIF_SUBR_RINDEX: {
|
---|
3674 | /*
|
---|
3675 | * We're going to iterate over the string looking for the
|
---|
3676 | * specified string. We will iterate until we have reached
|
---|
3677 | * the string length or we have found the string. (Yes, this
|
---|
3678 | * is done in the most naive way possible -- but considering
|
---|
3679 | * that the string we're searching for is likely to be
|
---|
3680 | * relatively short, the complexity of Rabin-Karp or similar
|
---|
3681 | * hardly seems merited.)
|
---|
3682 | */
|
---|
3683 | char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
|
---|
3684 | char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
|
---|
3685 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3686 | size_t len = dtrace_strlen(addr, size);
|
---|
3687 | size_t sublen = dtrace_strlen(substr, size);
|
---|
3688 | char *limit = addr + len, *orig = addr;
|
---|
3689 | int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
|
---|
3690 | int inc = 1;
|
---|
3691 |
|
---|
3692 | regs[rd] = notfound;
|
---|
3693 |
|
---|
3694 | if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
|
---|
3695 | regs[rd] = NULL;
|
---|
3696 | break;
|
---|
3697 | }
|
---|
3698 |
|
---|
3699 | if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
|
---|
3700 | vstate)) {
|
---|
3701 | regs[rd] = NULL;
|
---|
3702 | break;
|
---|
3703 | }
|
---|
3704 |
|
---|
3705 | /*
|
---|
3706 | * strstr() and index()/rindex() have similar semantics if
|
---|
3707 | * both strings are the empty string: strstr() returns a
|
---|
3708 | * pointer to the (empty) string, and index() and rindex()
|
---|
3709 | * both return index 0 (regardless of any position argument).
|
---|
3710 | */
|
---|
3711 | if (sublen == 0 && len == 0) {
|
---|
3712 | if (subr == DIF_SUBR_STRSTR)
|
---|
3713 | regs[rd] = (uintptr_t)addr;
|
---|
3714 | else
|
---|
3715 | regs[rd] = 0;
|
---|
3716 | break;
|
---|
3717 | }
|
---|
3718 |
|
---|
3719 | if (subr != DIF_SUBR_STRSTR) {
|
---|
3720 | if (subr == DIF_SUBR_RINDEX) {
|
---|
3721 | limit = orig - 1;
|
---|
3722 | addr += len;
|
---|
3723 | inc = -1;
|
---|
3724 | }
|
---|
3725 |
|
---|
3726 | /*
|
---|
3727 | * Both index() and rindex() take an optional position
|
---|
3728 | * argument that denotes the starting position.
|
---|
3729 | */
|
---|
3730 | if (nargs == 3) {
|
---|
3731 | int64_t pos = (int64_t)tupregs[2].dttk_value;
|
---|
3732 |
|
---|
3733 | /*
|
---|
3734 | * If the position argument to index() is
|
---|
3735 | * negative, Perl implicitly clamps it at
|
---|
3736 | * zero. This semantic is a little surprising
|
---|
3737 | * given the special meaning of negative
|
---|
3738 | * positions to similar Perl functions like
|
---|
3739 | * substr(), but it appears to reflect a
|
---|
3740 | * notion that index() can start from a
|
---|
3741 | * negative index and increment its way up to
|
---|
3742 | * the string. Given this notion, Perl's
|
---|
3743 | * rindex() is at least self-consistent in
|
---|
3744 | * that it implicitly clamps positions greater
|
---|
3745 | * than the string length to be the string
|
---|
3746 | * length. Where Perl completely loses
|
---|
3747 | * coherence, however, is when the specified
|
---|
3748 | * substring is the empty string (""). In
|
---|
3749 | * this case, even if the position is
|
---|
3750 | * negative, rindex() returns 0 -- and even if
|
---|
3751 | * the position is greater than the length,
|
---|
3752 | * index() returns the string length. These
|
---|
3753 | * semantics violate the notion that index()
|
---|
3754 | * should never return a value less than the
|
---|
3755 | * specified position and that rindex() should
|
---|
3756 | * never return a value greater than the
|
---|
3757 | * specified position. (One assumes that
|
---|
3758 | * these semantics are artifacts of Perl's
|
---|
3759 | * implementation and not the results of
|
---|
3760 | * deliberate design -- it beggars belief that
|
---|
3761 | * even Larry Wall could desire such oddness.)
|
---|
3762 | * While in the abstract one would wish for
|
---|
3763 | * consistent position semantics across
|
---|
3764 | * substr(), index() and rindex() -- or at the
|
---|
3765 | * very least self-consistent position
|
---|
3766 | * semantics for index() and rindex() -- we
|
---|
3767 | * instead opt to keep with the extant Perl
|
---|
3768 | * semantics, in all their broken glory. (Do
|
---|
3769 | * we have more desire to maintain Perl's
|
---|
3770 | * semantics than Perl does? Probably.)
|
---|
3771 | */
|
---|
3772 | if (subr == DIF_SUBR_RINDEX) {
|
---|
3773 | if (pos < 0) {
|
---|
3774 | if (sublen == 0)
|
---|
3775 | regs[rd] = 0;
|
---|
3776 | break;
|
---|
3777 | }
|
---|
3778 |
|
---|
3779 | if (VBDTCAST(uint64_t)pos > len)
|
---|
3780 | pos = len;
|
---|
3781 | } else {
|
---|
3782 | if (pos < 0)
|
---|
3783 | pos = 0;
|
---|
3784 |
|
---|
3785 | if (VBDTCAST(uint64_t)pos >= len) {
|
---|
3786 | if (sublen == 0)
|
---|
3787 | regs[rd] = len;
|
---|
3788 | break;
|
---|
3789 | }
|
---|
3790 | }
|
---|
3791 |
|
---|
3792 | addr = orig + pos;
|
---|
3793 | }
|
---|
3794 | }
|
---|
3795 |
|
---|
3796 | for (regs[rd] = notfound; addr != limit; addr += inc) {
|
---|
3797 | if (dtrace_strncmp(addr, substr, sublen) == 0) {
|
---|
3798 | if (subr != DIF_SUBR_STRSTR) {
|
---|
3799 | /*
|
---|
3800 | * As D index() and rindex() are
|
---|
3801 | * modeled on Perl (and not on awk),
|
---|
3802 | * we return a zero-based (and not a
|
---|
3803 | * one-based) index. (For you Perl
|
---|
3804 | * weenies: no, we're not going to add
|
---|
3805 | * $[ -- and shouldn't you be at a con
|
---|
3806 | * or something?)
|
---|
3807 | */
|
---|
3808 | regs[rd] = (uintptr_t)(addr - orig);
|
---|
3809 | break;
|
---|
3810 | }
|
---|
3811 |
|
---|
3812 | ASSERT(subr == DIF_SUBR_STRSTR);
|
---|
3813 | regs[rd] = (uintptr_t)addr;
|
---|
3814 | break;
|
---|
3815 | }
|
---|
3816 | }
|
---|
3817 |
|
---|
3818 | break;
|
---|
3819 | }
|
---|
3820 |
|
---|
3821 | case DIF_SUBR_STRTOK: {
|
---|
3822 | uintptr_t addr = tupregs[0].dttk_value;
|
---|
3823 | uintptr_t tokaddr = tupregs[1].dttk_value;
|
---|
3824 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3825 | uintptr_t limit, toklimit = tokaddr + size;
|
---|
3826 | uint8_t c VBDTUNASS(0), tokmap[32]; /* 256 / 8 */
|
---|
3827 | char *dest = (char *)mstate->dtms_scratch_ptr;
|
---|
3828 | VBDTTYPE(unsigned,int) i;
|
---|
3829 |
|
---|
3830 | /*
|
---|
3831 | * Check both the token buffer and (later) the input buffer,
|
---|
3832 | * since both could be non-scratch addresses.
|
---|
3833 | */
|
---|
3834 | if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) {
|
---|
3835 | regs[rd] = NULL;
|
---|
3836 | break;
|
---|
3837 | }
|
---|
3838 |
|
---|
3839 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
3840 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
3841 | regs[rd] = NULL;
|
---|
3842 | break;
|
---|
3843 | }
|
---|
3844 |
|
---|
3845 | if (addr == NULL) {
|
---|
3846 | /*
|
---|
3847 | * If the address specified is NULL, we use our saved
|
---|
3848 | * strtok pointer from the mstate. Note that this
|
---|
3849 | * means that the saved strtok pointer is _only_
|
---|
3850 | * valid within multiple enablings of the same probe --
|
---|
3851 | * it behaves like an implicit clause-local variable.
|
---|
3852 | */
|
---|
3853 | addr = mstate->dtms_strtok;
|
---|
3854 | } else {
|
---|
3855 | /*
|
---|
3856 | * If the user-specified address is non-NULL we must
|
---|
3857 | * access check it. This is the only time we have
|
---|
3858 | * a chance to do so, since this address may reside
|
---|
3859 | * in the string table of this clause-- future calls
|
---|
3860 | * (when we fetch addr from mstate->dtms_strtok)
|
---|
3861 | * would fail this access check.
|
---|
3862 | */
|
---|
3863 | if (!dtrace_strcanload(addr, size, mstate, vstate)) {
|
---|
3864 | regs[rd] = NULL;
|
---|
3865 | break;
|
---|
3866 | }
|
---|
3867 | }
|
---|
3868 |
|
---|
3869 | /*
|
---|
3870 | * First, zero the token map, and then process the token
|
---|
3871 | * string -- setting a bit in the map for every character
|
---|
3872 | * found in the token string.
|
---|
3873 | */
|
---|
3874 | for (i = 0; i < sizeof (tokmap); i++)
|
---|
3875 | tokmap[i] = 0;
|
---|
3876 |
|
---|
3877 | for (; tokaddr < toklimit; tokaddr++) {
|
---|
3878 | if ((c = dtrace_load8(tokaddr)) == '\0')
|
---|
3879 | break;
|
---|
3880 |
|
---|
3881 | ASSERT((c >> 3) < sizeof (tokmap));
|
---|
3882 | tokmap[c >> 3] |= (1 << (c & 0x7));
|
---|
3883 | }
|
---|
3884 |
|
---|
3885 | for (limit = addr + size; addr < limit; addr++) {
|
---|
3886 | /*
|
---|
3887 | * We're looking for a character that is _not_ contained
|
---|
3888 | * in the token string.
|
---|
3889 | */
|
---|
3890 | if ((c = dtrace_load8(addr)) == '\0')
|
---|
3891 | break;
|
---|
3892 |
|
---|
3893 | if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
|
---|
3894 | break;
|
---|
3895 | }
|
---|
3896 |
|
---|
3897 | if (c == '\0') {
|
---|
3898 | /*
|
---|
3899 | * We reached the end of the string without finding
|
---|
3900 | * any character that was not in the token string.
|
---|
3901 | * We return NULL in this case, and we set the saved
|
---|
3902 | * address to NULL as well.
|
---|
3903 | */
|
---|
3904 | regs[rd] = NULL;
|
---|
3905 | mstate->dtms_strtok = NULL;
|
---|
3906 | break;
|
---|
3907 | }
|
---|
3908 |
|
---|
3909 | /*
|
---|
3910 | * From here on, we're copying into the destination string.
|
---|
3911 | */
|
---|
3912 | for (i = 0; addr < limit && i < size - 1; addr++) {
|
---|
3913 | if ((c = dtrace_load8(addr)) == '\0')
|
---|
3914 | break;
|
---|
3915 |
|
---|
3916 | if (tokmap[c >> 3] & (1 << (c & 0x7)))
|
---|
3917 | break;
|
---|
3918 |
|
---|
3919 | ASSERT(i < size);
|
---|
3920 | dest[i++] = c;
|
---|
3921 | }
|
---|
3922 |
|
---|
3923 | ASSERT(i < size);
|
---|
3924 | dest[i] = '\0';
|
---|
3925 | regs[rd] = (uintptr_t)dest;
|
---|
3926 | mstate->dtms_scratch_ptr += size;
|
---|
3927 | mstate->dtms_strtok = addr;
|
---|
3928 | break;
|
---|
3929 | }
|
---|
3930 |
|
---|
3931 | case DIF_SUBR_SUBSTR: {
|
---|
3932 | uintptr_t s = tupregs[0].dttk_value;
|
---|
3933 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
3934 | char *d = (char *)mstate->dtms_scratch_ptr;
|
---|
3935 | int64_t index = (int64_t)tupregs[1].dttk_value;
|
---|
3936 | int64_t remaining = (int64_t)tupregs[2].dttk_value;
|
---|
3937 | size_t len = dtrace_strlen((char *)s, size);
|
---|
3938 | int64_t i;
|
---|
3939 |
|
---|
3940 | if (!dtrace_canload(s, len + 1, mstate, vstate)) {
|
---|
3941 | regs[rd] = NULL;
|
---|
3942 | break;
|
---|
3943 | }
|
---|
3944 |
|
---|
3945 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
3946 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
3947 | regs[rd] = NULL;
|
---|
3948 | break;
|
---|
3949 | }
|
---|
3950 |
|
---|
3951 | if (nargs <= 2)
|
---|
3952 | remaining = (int64_t)size;
|
---|
3953 |
|
---|
3954 | if (index < 0) {
|
---|
3955 | index += len;
|
---|
3956 |
|
---|
3957 | if (index < 0 && index + remaining > 0) {
|
---|
3958 | remaining += index;
|
---|
3959 | index = 0;
|
---|
3960 | }
|
---|
3961 | }
|
---|
3962 |
|
---|
3963 | if (VBDTCAST(uint64_t)index >= len || index < 0) {
|
---|
3964 | remaining = 0;
|
---|
3965 | } else if (remaining < 0) {
|
---|
3966 | remaining += len - index;
|
---|
3967 | } else if (VBDTCAST(uint64_t)index + remaining > size) {
|
---|
3968 | remaining = size - index;
|
---|
3969 | }
|
---|
3970 |
|
---|
3971 | for (i = 0; i < remaining; i++) {
|
---|
3972 | if ((d[i] = dtrace_load8(s + index + i)) == '\0')
|
---|
3973 | break;
|
---|
3974 | }
|
---|
3975 |
|
---|
3976 | d[i] = '\0';
|
---|
3977 |
|
---|
3978 | mstate->dtms_scratch_ptr += size;
|
---|
3979 | regs[rd] = (uintptr_t)d;
|
---|
3980 | break;
|
---|
3981 | }
|
---|
3982 |
|
---|
3983 | case DIF_SUBR_GETMAJOR:
|
---|
3984 | #ifndef VBOX
|
---|
3985 | #ifdef _LP64
|
---|
3986 | regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64;
|
---|
3987 | #else
|
---|
3988 | regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ;
|
---|
3989 | #endif
|
---|
3990 | #else
|
---|
3991 | regs[rd] = 0;
|
---|
3992 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
3993 | #endif
|
---|
3994 | break;
|
---|
3995 |
|
---|
3996 | case DIF_SUBR_GETMINOR:
|
---|
3997 | #ifndef VBOX
|
---|
3998 | #ifdef _LP64
|
---|
3999 | regs[rd] = tupregs[0].dttk_value & MAXMIN64;
|
---|
4000 | #else
|
---|
4001 | regs[rd] = tupregs[0].dttk_value & MAXMIN;
|
---|
4002 | #endif
|
---|
4003 | #else
|
---|
4004 | regs[rd] = 0;
|
---|
4005 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
4006 | #endif
|
---|
4007 | break;
|
---|
4008 |
|
---|
4009 | case DIF_SUBR_DDI_PATHNAME: {
|
---|
4010 | #ifndef VBOX
|
---|
4011 | /*
|
---|
4012 | * This one is a galactic mess. We are going to roughly
|
---|
4013 | * emulate ddi_pathname(), but it's made more complicated
|
---|
4014 | * by the fact that we (a) want to include the minor name and
|
---|
4015 | * (b) must proceed iteratively instead of recursively.
|
---|
4016 | */
|
---|
4017 | uintptr_t dest = mstate->dtms_scratch_ptr;
|
---|
4018 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
4019 | char *start = (char *)dest, *end = start + size - 1;
|
---|
4020 | uintptr_t daddr = tupregs[0].dttk_value;
|
---|
4021 | int64_t minor = (int64_t)tupregs[1].dttk_value;
|
---|
4022 | char *s;
|
---|
4023 | int i, len, depth = 0;
|
---|
4024 |
|
---|
4025 | /*
|
---|
4026 | * Due to all the pointer jumping we do and context we must
|
---|
4027 | * rely upon, we just mandate that the user must have kernel
|
---|
4028 | * read privileges to use this routine.
|
---|
4029 | */
|
---|
4030 | if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) == 0) {
|
---|
4031 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4032 | *illval = daddr;
|
---|
4033 | regs[rd] = NULL;
|
---|
4034 | }
|
---|
4035 |
|
---|
4036 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4037 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4038 | regs[rd] = NULL;
|
---|
4039 | break;
|
---|
4040 | }
|
---|
4041 |
|
---|
4042 | *end = '\0';
|
---|
4043 |
|
---|
4044 | /*
|
---|
4045 | * We want to have a name for the minor. In order to do this,
|
---|
4046 | * we need to walk the minor list from the devinfo. We want
|
---|
4047 | * to be sure that we don't infinitely walk a circular list,
|
---|
4048 | * so we check for circularity by sending a scout pointer
|
---|
4049 | * ahead two elements for every element that we iterate over;
|
---|
4050 | * if the list is circular, these will ultimately point to the
|
---|
4051 | * same element. You may recognize this little trick as the
|
---|
4052 | * answer to a stupid interview question -- one that always
|
---|
4053 | * seems to be asked by those who had to have it laboriously
|
---|
4054 | * explained to them, and who can't even concisely describe
|
---|
4055 | * the conditions under which one would be forced to resort to
|
---|
4056 | * this technique. Needless to say, those conditions are
|
---|
4057 | * found here -- and probably only here. Is this the only use
|
---|
4058 | * of this infamous trick in shipping, production code? If it
|
---|
4059 | * isn't, it probably should be...
|
---|
4060 | */
|
---|
4061 | if (minor != -1) {
|
---|
4062 | uintptr_t maddr = dtrace_loadptr(daddr +
|
---|
4063 | offsetof(struct dev_info, devi_minor));
|
---|
4064 |
|
---|
4065 | uintptr_t next = offsetof(struct ddi_minor_data, next);
|
---|
4066 | uintptr_t name = offsetof(struct ddi_minor_data,
|
---|
4067 | d_minor) + offsetof(struct ddi_minor, name);
|
---|
4068 | uintptr_t dev = offsetof(struct ddi_minor_data,
|
---|
4069 | d_minor) + offsetof(struct ddi_minor, dev);
|
---|
4070 | uintptr_t scout;
|
---|
4071 |
|
---|
4072 | if (maddr != NULL)
|
---|
4073 | scout = dtrace_loadptr(maddr + next);
|
---|
4074 |
|
---|
4075 | while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
|
---|
4076 | uint64_t m;
|
---|
4077 | #ifdef _LP64
|
---|
4078 | m = dtrace_load64(maddr + dev) & MAXMIN64;
|
---|
4079 | #else
|
---|
4080 | m = dtrace_load32(maddr + dev) & MAXMIN;
|
---|
4081 | #endif
|
---|
4082 | if (m != minor) {
|
---|
4083 | maddr = dtrace_loadptr(maddr + next);
|
---|
4084 |
|
---|
4085 | if (scout == NULL)
|
---|
4086 | continue;
|
---|
4087 |
|
---|
4088 | scout = dtrace_loadptr(scout + next);
|
---|
4089 |
|
---|
4090 | if (scout == NULL)
|
---|
4091 | continue;
|
---|
4092 |
|
---|
4093 | scout = dtrace_loadptr(scout + next);
|
---|
4094 |
|
---|
4095 | if (scout == NULL)
|
---|
4096 | continue;
|
---|
4097 |
|
---|
4098 | if (scout == maddr) {
|
---|
4099 | *flags |= CPU_DTRACE_ILLOP;
|
---|
4100 | break;
|
---|
4101 | }
|
---|
4102 |
|
---|
4103 | continue;
|
---|
4104 | }
|
---|
4105 |
|
---|
4106 | /*
|
---|
4107 | * We have the minor data. Now we need to
|
---|
4108 | * copy the minor's name into the end of the
|
---|
4109 | * pathname.
|
---|
4110 | */
|
---|
4111 | s = (char *)dtrace_loadptr(maddr + name);
|
---|
4112 | len = dtrace_strlen(s, size);
|
---|
4113 |
|
---|
4114 | if (*flags & CPU_DTRACE_FAULT)
|
---|
4115 | break;
|
---|
4116 |
|
---|
4117 | if (len != 0) {
|
---|
4118 | if ((end -= (len + 1)) < start)
|
---|
4119 | break;
|
---|
4120 |
|
---|
4121 | *end = ':';
|
---|
4122 | }
|
---|
4123 |
|
---|
4124 | for (i = 1; i <= len; i++)
|
---|
4125 | end[i] = dtrace_load8((uintptr_t)s++);
|
---|
4126 | break;
|
---|
4127 | }
|
---|
4128 | }
|
---|
4129 |
|
---|
4130 | while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) {
|
---|
4131 | ddi_node_state_t devi_state;
|
---|
4132 |
|
---|
4133 | devi_state = dtrace_load32(daddr +
|
---|
4134 | offsetof(struct dev_info, devi_node_state));
|
---|
4135 |
|
---|
4136 | if (*flags & CPU_DTRACE_FAULT)
|
---|
4137 | break;
|
---|
4138 |
|
---|
4139 | if (devi_state >= DS_INITIALIZED) {
|
---|
4140 | s = (char *)dtrace_loadptr(daddr +
|
---|
4141 | offsetof(struct dev_info, devi_addr));
|
---|
4142 | len = dtrace_strlen(s, size);
|
---|
4143 |
|
---|
4144 | if (*flags & CPU_DTRACE_FAULT)
|
---|
4145 | break;
|
---|
4146 |
|
---|
4147 | if (len != 0) {
|
---|
4148 | if ((end -= (len + 1)) < start)
|
---|
4149 | break;
|
---|
4150 |
|
---|
4151 | *end = '@';
|
---|
4152 | }
|
---|
4153 |
|
---|
4154 | for (i = 1; i <= len; i++)
|
---|
4155 | end[i] = dtrace_load8((uintptr_t)s++);
|
---|
4156 | }
|
---|
4157 |
|
---|
4158 | /*
|
---|
4159 | * Now for the node name...
|
---|
4160 | */
|
---|
4161 | s = (char *)dtrace_loadptr(daddr +
|
---|
4162 | offsetof(struct dev_info, devi_node_name));
|
---|
4163 |
|
---|
4164 | daddr = dtrace_loadptr(daddr +
|
---|
4165 | offsetof(struct dev_info, devi_parent));
|
---|
4166 |
|
---|
4167 | /*
|
---|
4168 | * If our parent is NULL (that is, if we're the root
|
---|
4169 | * node), we're going to use the special path
|
---|
4170 | * "devices".
|
---|
4171 | */
|
---|
4172 | if (daddr == NULL)
|
---|
4173 | s = "devices";
|
---|
4174 |
|
---|
4175 | len = dtrace_strlen(s, size);
|
---|
4176 | if (*flags & CPU_DTRACE_FAULT)
|
---|
4177 | break;
|
---|
4178 |
|
---|
4179 | if ((end -= (len + 1)) < start)
|
---|
4180 | break;
|
---|
4181 |
|
---|
4182 | for (i = 1; i <= len; i++)
|
---|
4183 | end[i] = dtrace_load8((uintptr_t)s++);
|
---|
4184 | *end = '/';
|
---|
4185 |
|
---|
4186 | if (depth++ > dtrace_devdepth_max) {
|
---|
4187 | *flags |= CPU_DTRACE_ILLOP;
|
---|
4188 | break;
|
---|
4189 | }
|
---|
4190 | }
|
---|
4191 |
|
---|
4192 | if (end < start)
|
---|
4193 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4194 |
|
---|
4195 | if (daddr == NULL) {
|
---|
4196 | regs[rd] = (uintptr_t)end;
|
---|
4197 | mstate->dtms_scratch_ptr += size;
|
---|
4198 | }
|
---|
4199 |
|
---|
4200 | #else
|
---|
4201 | regs[rd] = 0;
|
---|
4202 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
4203 | #endif
|
---|
4204 | break;
|
---|
4205 | }
|
---|
4206 |
|
---|
4207 | case DIF_SUBR_STRJOIN: {
|
---|
4208 | char *d = (char *)mstate->dtms_scratch_ptr;
|
---|
4209 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
4210 | uintptr_t s1 = tupregs[0].dttk_value;
|
---|
4211 | uintptr_t s2 = tupregs[1].dttk_value;
|
---|
4212 | VBDTTYPE(unsigned,int) i = 0;
|
---|
4213 |
|
---|
4214 | if (!dtrace_strcanload(s1, size, mstate, vstate) ||
|
---|
4215 | !dtrace_strcanload(s2, size, mstate, vstate)) {
|
---|
4216 | regs[rd] = NULL;
|
---|
4217 | break;
|
---|
4218 | }
|
---|
4219 |
|
---|
4220 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4221 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4222 | regs[rd] = NULL;
|
---|
4223 | break;
|
---|
4224 | }
|
---|
4225 |
|
---|
4226 | for (;;) {
|
---|
4227 | if (i >= size) {
|
---|
4228 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4229 | regs[rd] = NULL;
|
---|
4230 | break;
|
---|
4231 | }
|
---|
4232 |
|
---|
4233 | if ((d[i++] = dtrace_load8(s1++)) == '\0') {
|
---|
4234 | i--;
|
---|
4235 | break;
|
---|
4236 | }
|
---|
4237 | }
|
---|
4238 |
|
---|
4239 | for (;;) {
|
---|
4240 | if (i >= size) {
|
---|
4241 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4242 | regs[rd] = NULL;
|
---|
4243 | break;
|
---|
4244 | }
|
---|
4245 |
|
---|
4246 | if ((d[i++] = dtrace_load8(s2++)) == '\0')
|
---|
4247 | break;
|
---|
4248 | }
|
---|
4249 |
|
---|
4250 | if (i < size) {
|
---|
4251 | mstate->dtms_scratch_ptr += i;
|
---|
4252 | regs[rd] = (uintptr_t)d;
|
---|
4253 | }
|
---|
4254 |
|
---|
4255 | break;
|
---|
4256 | }
|
---|
4257 |
|
---|
4258 | case DIF_SUBR_LLTOSTR: {
|
---|
4259 | int64_t i = (int64_t)tupregs[0].dttk_value;
|
---|
4260 | int64_t val = i < 0 ? i * -1 : i;
|
---|
4261 | uint64_t size = 22; /* enough room for 2^64 in decimal */
|
---|
4262 | char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
|
---|
4263 |
|
---|
4264 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4265 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4266 | regs[rd] = NULL;
|
---|
4267 | break;
|
---|
4268 | }
|
---|
4269 |
|
---|
4270 | for (*end-- = '\0'; val; val /= 10)
|
---|
4271 | *end-- = '0' + (val % 10);
|
---|
4272 |
|
---|
4273 | if (i == 0)
|
---|
4274 | *end-- = '0';
|
---|
4275 |
|
---|
4276 | if (i < 0)
|
---|
4277 | *end-- = '-';
|
---|
4278 |
|
---|
4279 | regs[rd] = (uintptr_t)end + 1;
|
---|
4280 | mstate->dtms_scratch_ptr += size;
|
---|
4281 | break;
|
---|
4282 | }
|
---|
4283 |
|
---|
4284 | case DIF_SUBR_HTONS:
|
---|
4285 | case DIF_SUBR_NTOHS:
|
---|
4286 | #ifdef _BIG_ENDIAN
|
---|
4287 | regs[rd] = (uint16_t)tupregs[0].dttk_value;
|
---|
4288 | #else
|
---|
4289 | regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
|
---|
4290 | #endif
|
---|
4291 | break;
|
---|
4292 |
|
---|
4293 |
|
---|
4294 | case DIF_SUBR_HTONL:
|
---|
4295 | case DIF_SUBR_NTOHL:
|
---|
4296 | #ifdef _BIG_ENDIAN
|
---|
4297 | regs[rd] = (uint32_t)tupregs[0].dttk_value;
|
---|
4298 | #else
|
---|
4299 | regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
|
---|
4300 | #endif
|
---|
4301 | break;
|
---|
4302 |
|
---|
4303 |
|
---|
4304 | case DIF_SUBR_HTONLL:
|
---|
4305 | case DIF_SUBR_NTOHLL:
|
---|
4306 | #ifdef _BIG_ENDIAN
|
---|
4307 | regs[rd] = (uint64_t)tupregs[0].dttk_value;
|
---|
4308 | #else
|
---|
4309 | regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
|
---|
4310 | #endif
|
---|
4311 | break;
|
---|
4312 |
|
---|
4313 |
|
---|
4314 | case DIF_SUBR_DIRNAME:
|
---|
4315 | case DIF_SUBR_BASENAME: {
|
---|
4316 | char *dest = (char *)mstate->dtms_scratch_ptr;
|
---|
4317 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
4318 | uintptr_t src = tupregs[0].dttk_value;
|
---|
4319 | int i, j, len = VBDTCAST(int)dtrace_strlen((char *)src, size);
|
---|
4320 | int lastbase = -1, firstbase = -1, lastdir = -1;
|
---|
4321 | int start, end;
|
---|
4322 |
|
---|
4323 | if (!dtrace_canload(src, len + 1, mstate, vstate)) {
|
---|
4324 | regs[rd] = NULL;
|
---|
4325 | break;
|
---|
4326 | }
|
---|
4327 |
|
---|
4328 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4329 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4330 | regs[rd] = NULL;
|
---|
4331 | break;
|
---|
4332 | }
|
---|
4333 |
|
---|
4334 | /*
|
---|
4335 | * The basename and dirname for a zero-length string is
|
---|
4336 | * defined to be "."
|
---|
4337 | */
|
---|
4338 | if (len == 0) {
|
---|
4339 | len = 1;
|
---|
4340 | src = (uintptr_t)".";
|
---|
4341 | }
|
---|
4342 |
|
---|
4343 | /*
|
---|
4344 | * Start from the back of the string, moving back toward the
|
---|
4345 | * front until we see a character that isn't a slash. That
|
---|
4346 | * character is the last character in the basename.
|
---|
4347 | */
|
---|
4348 | for (i = len - 1; i >= 0; i--) {
|
---|
4349 | if (dtrace_load8(src + i) != '/')
|
---|
4350 | break;
|
---|
4351 | }
|
---|
4352 |
|
---|
4353 | if (i >= 0)
|
---|
4354 | lastbase = i;
|
---|
4355 |
|
---|
4356 | /*
|
---|
4357 | * Starting from the last character in the basename, move
|
---|
4358 | * towards the front until we find a slash. The character
|
---|
4359 | * that we processed immediately before that is the first
|
---|
4360 | * character in the basename.
|
---|
4361 | */
|
---|
4362 | for (; i >= 0; i--) {
|
---|
4363 | if (dtrace_load8(src + i) == '/')
|
---|
4364 | break;
|
---|
4365 | }
|
---|
4366 |
|
---|
4367 | if (i >= 0)
|
---|
4368 | firstbase = i + 1;
|
---|
4369 |
|
---|
4370 | /*
|
---|
4371 | * Now keep going until we find a non-slash character. That
|
---|
4372 | * character is the last character in the dirname.
|
---|
4373 | */
|
---|
4374 | for (; i >= 0; i--) {
|
---|
4375 | if (dtrace_load8(src + i) != '/')
|
---|
4376 | break;
|
---|
4377 | }
|
---|
4378 |
|
---|
4379 | if (i >= 0)
|
---|
4380 | lastdir = i;
|
---|
4381 |
|
---|
4382 | ASSERT(!(lastbase == -1 && firstbase != -1));
|
---|
4383 | ASSERT(!(firstbase == -1 && lastdir != -1));
|
---|
4384 |
|
---|
4385 | if (lastbase == -1) {
|
---|
4386 | /*
|
---|
4387 | * We didn't find a non-slash character. We know that
|
---|
4388 | * the length is non-zero, so the whole string must be
|
---|
4389 | * slashes. In either the dirname or the basename
|
---|
4390 | * case, we return '/'.
|
---|
4391 | */
|
---|
4392 | ASSERT(firstbase == -1);
|
---|
4393 | firstbase = lastbase = lastdir = 0;
|
---|
4394 | }
|
---|
4395 |
|
---|
4396 | if (firstbase == -1) {
|
---|
4397 | /*
|
---|
4398 | * The entire string consists only of a basename
|
---|
4399 | * component. If we're looking for dirname, we need
|
---|
4400 | * to change our string to be just "."; if we're
|
---|
4401 | * looking for a basename, we'll just set the first
|
---|
4402 | * character of the basename to be 0.
|
---|
4403 | */
|
---|
4404 | if (subr == DIF_SUBR_DIRNAME) {
|
---|
4405 | ASSERT(lastdir == -1);
|
---|
4406 | src = (uintptr_t)".";
|
---|
4407 | lastdir = 0;
|
---|
4408 | } else {
|
---|
4409 | firstbase = 0;
|
---|
4410 | }
|
---|
4411 | }
|
---|
4412 |
|
---|
4413 | if (subr == DIF_SUBR_DIRNAME) {
|
---|
4414 | if (lastdir == -1) {
|
---|
4415 | /*
|
---|
4416 | * We know that we have a slash in the name --
|
---|
4417 | * or lastdir would be set to 0, above. And
|
---|
4418 | * because lastdir is -1, we know that this
|
---|
4419 | * slash must be the first character. (That
|
---|
4420 | * is, the full string must be of the form
|
---|
4421 | * "/basename".) In this case, the last
|
---|
4422 | * character of the directory name is 0.
|
---|
4423 | */
|
---|
4424 | lastdir = 0;
|
---|
4425 | }
|
---|
4426 |
|
---|
4427 | start = 0;
|
---|
4428 | end = lastdir;
|
---|
4429 | } else {
|
---|
4430 | ASSERT(subr == DIF_SUBR_BASENAME);
|
---|
4431 | ASSERT(firstbase != -1 && lastbase != -1);
|
---|
4432 | start = firstbase;
|
---|
4433 | end = lastbase;
|
---|
4434 | }
|
---|
4435 |
|
---|
4436 | for (i = start, j = 0; i <= end && VBDTCAST(unsigned)j < size - 1; i++, j++)
|
---|
4437 | dest[j] = dtrace_load8(src + i);
|
---|
4438 |
|
---|
4439 | dest[j] = '\0';
|
---|
4440 | regs[rd] = (uintptr_t)dest;
|
---|
4441 | mstate->dtms_scratch_ptr += size;
|
---|
4442 | break;
|
---|
4443 | }
|
---|
4444 |
|
---|
4445 | case DIF_SUBR_CLEANPATH: {
|
---|
4446 | char *dest = (char *)mstate->dtms_scratch_ptr, c;
|
---|
4447 | uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
4448 | uintptr_t src = tupregs[0].dttk_value;
|
---|
4449 | int i = 0, j = 0;
|
---|
4450 |
|
---|
4451 | if (!dtrace_strcanload(src, size, mstate, vstate)) {
|
---|
4452 | regs[rd] = NULL;
|
---|
4453 | break;
|
---|
4454 | }
|
---|
4455 |
|
---|
4456 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4457 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4458 | regs[rd] = NULL;
|
---|
4459 | break;
|
---|
4460 | }
|
---|
4461 |
|
---|
4462 | /*
|
---|
4463 | * Move forward, loading each character.
|
---|
4464 | */
|
---|
4465 | do {
|
---|
4466 | c = dtrace_load8(src + i++);
|
---|
4467 | next:
|
---|
4468 | if (j + 5 >= VBDTCAST(int64_t)size) /* 5 = strlen("/..c\0") */
|
---|
4469 | break;
|
---|
4470 |
|
---|
4471 | if (c != '/') {
|
---|
4472 | dest[j++] = c;
|
---|
4473 | continue;
|
---|
4474 | }
|
---|
4475 |
|
---|
4476 | c = dtrace_load8(src + i++);
|
---|
4477 |
|
---|
4478 | if (c == '/') {
|
---|
4479 | /*
|
---|
4480 | * We have two slashes -- we can just advance
|
---|
4481 | * to the next character.
|
---|
4482 | */
|
---|
4483 | goto next;
|
---|
4484 | }
|
---|
4485 |
|
---|
4486 | if (c != '.') {
|
---|
4487 | /*
|
---|
4488 | * This is not "." and it's not ".." -- we can
|
---|
4489 | * just store the "/" and this character and
|
---|
4490 | * drive on.
|
---|
4491 | */
|
---|
4492 | dest[j++] = '/';
|
---|
4493 | dest[j++] = c;
|
---|
4494 | continue;
|
---|
4495 | }
|
---|
4496 |
|
---|
4497 | c = dtrace_load8(src + i++);
|
---|
4498 |
|
---|
4499 | if (c == '/') {
|
---|
4500 | /*
|
---|
4501 | * This is a "/./" component. We're not going
|
---|
4502 | * to store anything in the destination buffer;
|
---|
4503 | * we're just going to go to the next component.
|
---|
4504 | */
|
---|
4505 | goto next;
|
---|
4506 | }
|
---|
4507 |
|
---|
4508 | if (c != '.') {
|
---|
4509 | /*
|
---|
4510 | * This is not ".." -- we can just store the
|
---|
4511 | * "/." and this character and continue
|
---|
4512 | * processing.
|
---|
4513 | */
|
---|
4514 | dest[j++] = '/';
|
---|
4515 | dest[j++] = '.';
|
---|
4516 | dest[j++] = c;
|
---|
4517 | continue;
|
---|
4518 | }
|
---|
4519 |
|
---|
4520 | c = dtrace_load8(src + i++);
|
---|
4521 |
|
---|
4522 | if (c != '/' && c != '\0') {
|
---|
4523 | /*
|
---|
4524 | * This is not ".." -- it's "..[mumble]".
|
---|
4525 | * We'll store the "/.." and this character
|
---|
4526 | * and continue processing.
|
---|
4527 | */
|
---|
4528 | dest[j++] = '/';
|
---|
4529 | dest[j++] = '.';
|
---|
4530 | dest[j++] = '.';
|
---|
4531 | dest[j++] = c;
|
---|
4532 | continue;
|
---|
4533 | }
|
---|
4534 |
|
---|
4535 | /*
|
---|
4536 | * This is "/../" or "/..\0". We need to back up
|
---|
4537 | * our destination pointer until we find a "/".
|
---|
4538 | */
|
---|
4539 | i--;
|
---|
4540 | while (j != 0 && dest[--j] != '/')
|
---|
4541 | continue;
|
---|
4542 |
|
---|
4543 | if (c == '\0')
|
---|
4544 | dest[++j] = '/';
|
---|
4545 | } while (c != '\0');
|
---|
4546 |
|
---|
4547 | dest[j] = '\0';
|
---|
4548 | regs[rd] = (uintptr_t)dest;
|
---|
4549 | mstate->dtms_scratch_ptr += size;
|
---|
4550 | break;
|
---|
4551 | }
|
---|
4552 |
|
---|
4553 | case DIF_SUBR_INET_NTOA:
|
---|
4554 | case DIF_SUBR_INET_NTOA6:
|
---|
4555 | case DIF_SUBR_INET_NTOP: {
|
---|
4556 | #ifndef VBOX
|
---|
4557 | size_t size;
|
---|
4558 | int af, argi, i;
|
---|
4559 | char *base, *end;
|
---|
4560 |
|
---|
4561 | if (subr == DIF_SUBR_INET_NTOP) {
|
---|
4562 | af = (int)tupregs[0].dttk_value;
|
---|
4563 | argi = 1;
|
---|
4564 | } else {
|
---|
4565 | af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
|
---|
4566 | argi = 0;
|
---|
4567 | }
|
---|
4568 |
|
---|
4569 | if (af == AF_INET) {
|
---|
4570 | ipaddr_t ip4;
|
---|
4571 | uint8_t *ptr8, val;
|
---|
4572 |
|
---|
4573 | /*
|
---|
4574 | * Safely load the IPv4 address.
|
---|
4575 | */
|
---|
4576 | ip4 = dtrace_load32(tupregs[argi].dttk_value);
|
---|
4577 |
|
---|
4578 | /*
|
---|
4579 | * Check an IPv4 string will fit in scratch.
|
---|
4580 | */
|
---|
4581 | size = INET_ADDRSTRLEN;
|
---|
4582 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4583 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4584 | regs[rd] = NULL;
|
---|
4585 | break;
|
---|
4586 | }
|
---|
4587 | base = (char *)mstate->dtms_scratch_ptr;
|
---|
4588 | end = (char *)mstate->dtms_scratch_ptr + size - 1;
|
---|
4589 |
|
---|
4590 | /*
|
---|
4591 | * Stringify as a dotted decimal quad.
|
---|
4592 | */
|
---|
4593 | *end-- = '\0';
|
---|
4594 | ptr8 = (uint8_t *)&ip4;
|
---|
4595 | for (i = 3; i >= 0; i--) {
|
---|
4596 | val = ptr8[i];
|
---|
4597 |
|
---|
4598 | if (val == 0) {
|
---|
4599 | *end-- = '0';
|
---|
4600 | } else {
|
---|
4601 | for (; val; val /= 10) {
|
---|
4602 | *end-- = '0' + (val % 10);
|
---|
4603 | }
|
---|
4604 | }
|
---|
4605 |
|
---|
4606 | if (i > 0)
|
---|
4607 | *end-- = '.';
|
---|
4608 | }
|
---|
4609 | ASSERT(end + 1 >= base);
|
---|
4610 |
|
---|
4611 | } else if (af == AF_INET6) {
|
---|
4612 | struct in6_addr ip6;
|
---|
4613 | int firstzero, tryzero, numzero, v6end;
|
---|
4614 | uint16_t val;
|
---|
4615 | const char digits[] = "0123456789abcdef";
|
---|
4616 |
|
---|
4617 | /*
|
---|
4618 | * Stringify using RFC 1884 convention 2 - 16 bit
|
---|
4619 | * hexadecimal values with a zero-run compression.
|
---|
4620 | * Lower case hexadecimal digits are used.
|
---|
4621 | * eg, fe80::214:4fff:fe0b:76c8.
|
---|
4622 | * The IPv4 embedded form is returned for inet_ntop,
|
---|
4623 | * just the IPv4 string is returned for inet_ntoa6.
|
---|
4624 | */
|
---|
4625 |
|
---|
4626 | /*
|
---|
4627 | * Safely load the IPv6 address.
|
---|
4628 | */
|
---|
4629 | dtrace_bcopy(
|
---|
4630 | (void *)(uintptr_t)tupregs[argi].dttk_value,
|
---|
4631 | (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
|
---|
4632 |
|
---|
4633 | /*
|
---|
4634 | * Check an IPv6 string will fit in scratch.
|
---|
4635 | */
|
---|
4636 | size = INET6_ADDRSTRLEN;
|
---|
4637 | if (!DTRACE_INSCRATCH(mstate, size)) {
|
---|
4638 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
4639 | regs[rd] = NULL;
|
---|
4640 | break;
|
---|
4641 | }
|
---|
4642 | base = (char *)mstate->dtms_scratch_ptr;
|
---|
4643 | end = (char *)mstate->dtms_scratch_ptr + size - 1;
|
---|
4644 | *end-- = '\0';
|
---|
4645 |
|
---|
4646 | /*
|
---|
4647 | * Find the longest run of 16 bit zero values
|
---|
4648 | * for the single allowed zero compression - "::".
|
---|
4649 | */
|
---|
4650 | firstzero = -1;
|
---|
4651 | tryzero = -1;
|
---|
4652 | numzero = 1;
|
---|
4653 | for (i = 0; i < sizeof (struct in6_addr); i++) {
|
---|
4654 | if (ip6._S6_un._S6_u8[i] == 0 &&
|
---|
4655 | tryzero == -1 && i % 2 == 0) {
|
---|
4656 | tryzero = i;
|
---|
4657 | continue;
|
---|
4658 | }
|
---|
4659 |
|
---|
4660 | if (tryzero != -1 &&
|
---|
4661 | (ip6._S6_un._S6_u8[i] != 0 ||
|
---|
4662 | i == sizeof (struct in6_addr) - 1)) {
|
---|
4663 |
|
---|
4664 | if (i - tryzero <= numzero) {
|
---|
4665 | tryzero = -1;
|
---|
4666 | continue;
|
---|
4667 | }
|
---|
4668 |
|
---|
4669 | firstzero = tryzero;
|
---|
4670 | numzero = i - i % 2 - tryzero;
|
---|
4671 | tryzero = -1;
|
---|
4672 |
|
---|
4673 | if (ip6._S6_un._S6_u8[i] == 0 &&
|
---|
4674 | i == sizeof (struct in6_addr) - 1)
|
---|
4675 | numzero += 2;
|
---|
4676 | }
|
---|
4677 | }
|
---|
4678 | ASSERT(firstzero + numzero <= sizeof (struct in6_addr));
|
---|
4679 |
|
---|
4680 | /*
|
---|
4681 | * Check for an IPv4 embedded address.
|
---|
4682 | */
|
---|
4683 | v6end = sizeof (struct in6_addr) - 2;
|
---|
4684 | if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
|
---|
4685 | IN6_IS_ADDR_V4COMPAT(&ip6)) {
|
---|
4686 | for (i = sizeof (struct in6_addr) - 1;
|
---|
4687 | i >= DTRACE_V4MAPPED_OFFSET; i--) {
|
---|
4688 | ASSERT(end >= base);
|
---|
4689 |
|
---|
4690 | val = ip6._S6_un._S6_u8[i];
|
---|
4691 |
|
---|
4692 | if (val == 0) {
|
---|
4693 | *end-- = '0';
|
---|
4694 | } else {
|
---|
4695 | for (; val; val /= 10) {
|
---|
4696 | *end-- = '0' + val % 10;
|
---|
4697 | }
|
---|
4698 | }
|
---|
4699 |
|
---|
4700 | if (i > DTRACE_V4MAPPED_OFFSET)
|
---|
4701 | *end-- = '.';
|
---|
4702 | }
|
---|
4703 |
|
---|
4704 | if (subr == DIF_SUBR_INET_NTOA6)
|
---|
4705 | goto inetout;
|
---|
4706 |
|
---|
4707 | /*
|
---|
4708 | * Set v6end to skip the IPv4 address that
|
---|
4709 | * we have already stringified.
|
---|
4710 | */
|
---|
4711 | v6end = 10;
|
---|
4712 | }
|
---|
4713 |
|
---|
4714 | /*
|
---|
4715 | * Build the IPv6 string by working through the
|
---|
4716 | * address in reverse.
|
---|
4717 | */
|
---|
4718 | for (i = v6end; i >= 0; i -= 2) {
|
---|
4719 | ASSERT(end >= base);
|
---|
4720 |
|
---|
4721 | if (i == firstzero + numzero - 2) {
|
---|
4722 | *end-- = ':';
|
---|
4723 | *end-- = ':';
|
---|
4724 | i -= numzero - 2;
|
---|
4725 | continue;
|
---|
4726 | }
|
---|
4727 |
|
---|
4728 | if (i < 14 && i != firstzero - 2)
|
---|
4729 | *end-- = ':';
|
---|
4730 |
|
---|
4731 | val = (ip6._S6_un._S6_u8[i] << 8) +
|
---|
4732 | ip6._S6_un._S6_u8[i + 1];
|
---|
4733 |
|
---|
4734 | if (val == 0) {
|
---|
4735 | *end-- = '0';
|
---|
4736 | } else {
|
---|
4737 | for (; val; val /= 16) {
|
---|
4738 | *end-- = digits[val % 16];
|
---|
4739 | }
|
---|
4740 | }
|
---|
4741 | }
|
---|
4742 | ASSERT(end + 1 >= base);
|
---|
4743 |
|
---|
4744 | } else {
|
---|
4745 | /*
|
---|
4746 | * The user didn't use AH_INET or AH_INET6.
|
---|
4747 | */
|
---|
4748 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
4749 | regs[rd] = NULL;
|
---|
4750 | break;
|
---|
4751 | }
|
---|
4752 |
|
---|
4753 | inetout: regs[rd] = (uintptr_t)end + 1;
|
---|
4754 | mstate->dtms_scratch_ptr += size;
|
---|
4755 | #else /* VBOX */
|
---|
4756 | regs[rd] = 0;
|
---|
4757 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
|
---|
4758 | #endif /* VBOX */
|
---|
4759 | break;
|
---|
4760 | }
|
---|
4761 |
|
---|
4762 | }
|
---|
4763 | }
|
---|
4764 |
|
---|
4765 | /*
|
---|
4766 | * Emulate the execution of DTrace IR instructions specified by the given
|
---|
4767 | * DIF object. This function is deliberately void of assertions as all of
|
---|
4768 | * the necessary checks are handled by a call to dtrace_difo_validate().
|
---|
4769 | */
|
---|
4770 | static uint64_t
|
---|
4771 | dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
|
---|
4772 | dtrace_vstate_t *vstate, dtrace_state_t *state)
|
---|
4773 | {
|
---|
4774 | const dif_instr_t *text = difo->dtdo_buf;
|
---|
4775 | const uint_t textlen = difo->dtdo_len;
|
---|
4776 | const char *strtab = difo->dtdo_strtab;
|
---|
4777 | const uint64_t *inttab = difo->dtdo_inttab;
|
---|
4778 |
|
---|
4779 | uint64_t rval = 0;
|
---|
4780 | dtrace_statvar_t *svar;
|
---|
4781 | dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
|
---|
4782 | dtrace_difv_t *v;
|
---|
4783 | volatile uint16_t *flags = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_flags;
|
---|
4784 | volatile uintptr_t *illval = &cpu_core[VBDT_GET_CPUID()].cpuc_dtrace_illval;
|
---|
4785 |
|
---|
4786 | dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
|
---|
4787 | uint64_t regs[DIF_DIR_NREGS];
|
---|
4788 | uint64_t *tmp;
|
---|
4789 |
|
---|
4790 | uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
|
---|
4791 | int64_t cc_r;
|
---|
4792 | uint_t pc = 0, id, opc VBDTUNASS(0);
|
---|
4793 | uint8_t ttop = 0;
|
---|
4794 | dif_instr_t instr;
|
---|
4795 | uint_t r1, r2, rd;
|
---|
4796 |
|
---|
4797 | /*
|
---|
4798 | * We stash the current DIF object into the machine state: we need it
|
---|
4799 | * for subsequent access checking.
|
---|
4800 | */
|
---|
4801 | mstate->dtms_difo = difo;
|
---|
4802 |
|
---|
4803 | regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
|
---|
4804 |
|
---|
4805 | while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
|
---|
4806 | opc = pc;
|
---|
4807 |
|
---|
4808 | instr = text[pc++];
|
---|
4809 | r1 = DIF_INSTR_R1(instr);
|
---|
4810 | r2 = DIF_INSTR_R2(instr);
|
---|
4811 | rd = DIF_INSTR_RD(instr);
|
---|
4812 |
|
---|
4813 | switch (DIF_INSTR_OP(instr)) {
|
---|
4814 | case DIF_OP_OR:
|
---|
4815 | regs[rd] = regs[r1] | regs[r2];
|
---|
4816 | break;
|
---|
4817 | case DIF_OP_XOR:
|
---|
4818 | regs[rd] = regs[r1] ^ regs[r2];
|
---|
4819 | break;
|
---|
4820 | case DIF_OP_AND:
|
---|
4821 | regs[rd] = regs[r1] & regs[r2];
|
---|
4822 | break;
|
---|
4823 | case DIF_OP_SLL:
|
---|
4824 | regs[rd] = regs[r1] << regs[r2];
|
---|
4825 | break;
|
---|
4826 | case DIF_OP_SRL:
|
---|
4827 | regs[rd] = regs[r1] >> regs[r2];
|
---|
4828 | break;
|
---|
4829 | case DIF_OP_SUB:
|
---|
4830 | regs[rd] = regs[r1] - regs[r2];
|
---|
4831 | break;
|
---|
4832 | case DIF_OP_ADD:
|
---|
4833 | regs[rd] = regs[r1] + regs[r2];
|
---|
4834 | break;
|
---|
4835 | case DIF_OP_MUL:
|
---|
4836 | regs[rd] = regs[r1] * regs[r2];
|
---|
4837 | break;
|
---|
4838 | case DIF_OP_SDIV:
|
---|
4839 | if (regs[r2] == 0) {
|
---|
4840 | regs[rd] = 0;
|
---|
4841 | *flags |= CPU_DTRACE_DIVZERO;
|
---|
4842 | } else {
|
---|
4843 | regs[rd] = (int64_t)regs[r1] /
|
---|
4844 | (int64_t)regs[r2];
|
---|
4845 | }
|
---|
4846 | break;
|
---|
4847 |
|
---|
4848 | case DIF_OP_UDIV:
|
---|
4849 | if (regs[r2] == 0) {
|
---|
4850 | regs[rd] = 0;
|
---|
4851 | *flags |= CPU_DTRACE_DIVZERO;
|
---|
4852 | } else {
|
---|
4853 | regs[rd] = regs[r1] / regs[r2];
|
---|
4854 | }
|
---|
4855 | break;
|
---|
4856 |
|
---|
4857 | case DIF_OP_SREM:
|
---|
4858 | if (regs[r2] == 0) {
|
---|
4859 | regs[rd] = 0;
|
---|
4860 | *flags |= CPU_DTRACE_DIVZERO;
|
---|
4861 | } else {
|
---|
4862 | regs[rd] = (int64_t)regs[r1] %
|
---|
4863 | (int64_t)regs[r2];
|
---|
4864 | }
|
---|
4865 | break;
|
---|
4866 |
|
---|
4867 | case DIF_OP_UREM:
|
---|
4868 | if (regs[r2] == 0) {
|
---|
4869 | regs[rd] = 0;
|
---|
4870 | *flags |= CPU_DTRACE_DIVZERO;
|
---|
4871 | } else {
|
---|
4872 | regs[rd] = regs[r1] % regs[r2];
|
---|
4873 | }
|
---|
4874 | break;
|
---|
4875 |
|
---|
4876 | case DIF_OP_NOT:
|
---|
4877 | regs[rd] = ~regs[r1];
|
---|
4878 | break;
|
---|
4879 | case DIF_OP_MOV:
|
---|
4880 | regs[rd] = regs[r1];
|
---|
4881 | break;
|
---|
4882 | case DIF_OP_CMP:
|
---|
4883 | cc_r = regs[r1] - regs[r2];
|
---|
4884 | cc_n = cc_r < 0;
|
---|
4885 | cc_z = cc_r == 0;
|
---|
4886 | cc_v = 0;
|
---|
4887 | cc_c = regs[r1] < regs[r2];
|
---|
4888 | break;
|
---|
4889 | case DIF_OP_TST:
|
---|
4890 | cc_n = cc_v = cc_c = 0;
|
---|
4891 | cc_z = regs[r1] == 0;
|
---|
4892 | break;
|
---|
4893 | case DIF_OP_BA:
|
---|
4894 | pc = DIF_INSTR_LABEL(instr);
|
---|
4895 | break;
|
---|
4896 | case DIF_OP_BE:
|
---|
4897 | if (cc_z)
|
---|
4898 | pc = DIF_INSTR_LABEL(instr);
|
---|
4899 | break;
|
---|
4900 | case DIF_OP_BNE:
|
---|
4901 | if (cc_z == 0)
|
---|
4902 | pc = DIF_INSTR_LABEL(instr);
|
---|
4903 | break;
|
---|
4904 | case DIF_OP_BG:
|
---|
4905 | if ((cc_z | (cc_n ^ cc_v)) == 0)
|
---|
4906 | pc = DIF_INSTR_LABEL(instr);
|
---|
4907 | break;
|
---|
4908 | case DIF_OP_BGU:
|
---|
4909 | if ((cc_c | cc_z) == 0)
|
---|
4910 | pc = DIF_INSTR_LABEL(instr);
|
---|
4911 | break;
|
---|
4912 | case DIF_OP_BGE:
|
---|
4913 | if ((cc_n ^ cc_v) == 0)
|
---|
4914 | pc = DIF_INSTR_LABEL(instr);
|
---|
4915 | break;
|
---|
4916 | case DIF_OP_BGEU:
|
---|
4917 | if (cc_c == 0)
|
---|
4918 | pc = DIF_INSTR_LABEL(instr);
|
---|
4919 | break;
|
---|
4920 | case DIF_OP_BL:
|
---|
4921 | if (cc_n ^ cc_v)
|
---|
4922 | pc = DIF_INSTR_LABEL(instr);
|
---|
4923 | break;
|
---|
4924 | case DIF_OP_BLU:
|
---|
4925 | if (cc_c)
|
---|
4926 | pc = DIF_INSTR_LABEL(instr);
|
---|
4927 | break;
|
---|
4928 | case DIF_OP_BLE:
|
---|
4929 | if (cc_z | (cc_n ^ cc_v))
|
---|
4930 | pc = DIF_INSTR_LABEL(instr);
|
---|
4931 | break;
|
---|
4932 | case DIF_OP_BLEU:
|
---|
4933 | if (cc_c | cc_z)
|
---|
4934 | pc = DIF_INSTR_LABEL(instr);
|
---|
4935 | break;
|
---|
4936 | case DIF_OP_RLDSB:
|
---|
4937 | if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
|
---|
4938 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4939 | *illval = regs[r1];
|
---|
4940 | break;
|
---|
4941 | }
|
---|
4942 | RT_FALL_THRU();
|
---|
4943 | case DIF_OP_LDSB:
|
---|
4944 | regs[rd] = (int8_t)dtrace_load8(regs[r1]);
|
---|
4945 | break;
|
---|
4946 | case DIF_OP_RLDSH:
|
---|
4947 | if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
|
---|
4948 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4949 | *illval = regs[r1];
|
---|
4950 | break;
|
---|
4951 | }
|
---|
4952 | RT_FALL_THRU();
|
---|
4953 | case DIF_OP_LDSH:
|
---|
4954 | regs[rd] = (int16_t)dtrace_load16(regs[r1]);
|
---|
4955 | break;
|
---|
4956 | case DIF_OP_RLDSW:
|
---|
4957 | if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
|
---|
4958 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4959 | *illval = regs[r1];
|
---|
4960 | break;
|
---|
4961 | }
|
---|
4962 | RT_FALL_THRU();
|
---|
4963 | case DIF_OP_LDSW:
|
---|
4964 | regs[rd] = (int32_t)dtrace_load32(regs[r1]);
|
---|
4965 | break;
|
---|
4966 | case DIF_OP_RLDUB:
|
---|
4967 | if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
|
---|
4968 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4969 | *illval = regs[r1];
|
---|
4970 | break;
|
---|
4971 | }
|
---|
4972 | RT_FALL_THRU();
|
---|
4973 | case DIF_OP_LDUB:
|
---|
4974 | regs[rd] = dtrace_load8(regs[r1]);
|
---|
4975 | break;
|
---|
4976 | case DIF_OP_RLDUH:
|
---|
4977 | if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
|
---|
4978 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4979 | *illval = regs[r1];
|
---|
4980 | break;
|
---|
4981 | }
|
---|
4982 | RT_FALL_THRU();
|
---|
4983 | case DIF_OP_LDUH:
|
---|
4984 | regs[rd] = dtrace_load16(regs[r1]);
|
---|
4985 | break;
|
---|
4986 | case DIF_OP_RLDUW:
|
---|
4987 | if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
|
---|
4988 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4989 | *illval = regs[r1];
|
---|
4990 | break;
|
---|
4991 | }
|
---|
4992 | RT_FALL_THRU();
|
---|
4993 | case DIF_OP_LDUW:
|
---|
4994 | regs[rd] = dtrace_load32(regs[r1]);
|
---|
4995 | break;
|
---|
4996 | case DIF_OP_RLDX:
|
---|
4997 | if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
|
---|
4998 | *flags |= CPU_DTRACE_KPRIV;
|
---|
4999 | *illval = regs[r1];
|
---|
5000 | break;
|
---|
5001 | }
|
---|
5002 | RT_FALL_THRU();
|
---|
5003 | case DIF_OP_LDX:
|
---|
5004 | regs[rd] = dtrace_load64(regs[r1]);
|
---|
5005 | break;
|
---|
5006 | case DIF_OP_ULDSB:
|
---|
5007 | regs[rd] = (int8_t)
|
---|
5008 | dtrace_fuword8((void *)(uintptr_t)regs[r1]);
|
---|
5009 | break;
|
---|
5010 | case DIF_OP_ULDSH:
|
---|
5011 | regs[rd] = (int16_t)
|
---|
5012 | dtrace_fuword16((void *)(uintptr_t)regs[r1]);
|
---|
5013 | break;
|
---|
5014 | case DIF_OP_ULDSW:
|
---|
5015 | regs[rd] = (int32_t)
|
---|
5016 | dtrace_fuword32((void *)(uintptr_t)regs[r1]);
|
---|
5017 | break;
|
---|
5018 | case DIF_OP_ULDUB:
|
---|
5019 | regs[rd] =
|
---|
5020 | dtrace_fuword8((void *)(uintptr_t)regs[r1]);
|
---|
5021 | break;
|
---|
5022 | case DIF_OP_ULDUH:
|
---|
5023 | regs[rd] =
|
---|
5024 | dtrace_fuword16((void *)(uintptr_t)regs[r1]);
|
---|
5025 | break;
|
---|
5026 | case DIF_OP_ULDUW:
|
---|
5027 | regs[rd] =
|
---|
5028 | dtrace_fuword32((void *)(uintptr_t)regs[r1]);
|
---|
5029 | break;
|
---|
5030 | case DIF_OP_ULDX:
|
---|
5031 | regs[rd] =
|
---|
5032 | dtrace_fuword64((void *)(uintptr_t)regs[r1]);
|
---|
5033 | break;
|
---|
5034 | case DIF_OP_RET:
|
---|
5035 | rval = regs[rd];
|
---|
5036 | pc = textlen;
|
---|
5037 | break;
|
---|
5038 | case DIF_OP_NOP:
|
---|
5039 | break;
|
---|
5040 | case DIF_OP_SETX:
|
---|
5041 | regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
|
---|
5042 | break;
|
---|
5043 | case DIF_OP_SETS:
|
---|
5044 | regs[rd] = (uint64_t)(uintptr_t)
|
---|
5045 | (strtab + DIF_INSTR_STRING(instr));
|
---|
5046 | break;
|
---|
5047 | case DIF_OP_SCMP: {
|
---|
5048 | size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
|
---|
5049 | uintptr_t s1 = regs[r1];
|
---|
5050 | uintptr_t s2 = regs[r2];
|
---|
5051 |
|
---|
5052 | if (s1 != NULL &&
|
---|
5053 | !dtrace_strcanload(s1, sz, mstate, vstate))
|
---|
5054 | break;
|
---|
5055 | if (s2 != NULL &&
|
---|
5056 | !dtrace_strcanload(s2, sz, mstate, vstate))
|
---|
5057 | break;
|
---|
5058 |
|
---|
5059 | cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz);
|
---|
5060 |
|
---|
5061 | cc_n = cc_r < 0;
|
---|
5062 | cc_z = cc_r == 0;
|
---|
5063 | cc_v = cc_c = 0;
|
---|
5064 | break;
|
---|
5065 | }
|
---|
5066 | case DIF_OP_LDGA:
|
---|
5067 | regs[rd] = dtrace_dif_variable(mstate, state,
|
---|
5068 | r1, regs[r2]);
|
---|
5069 | break;
|
---|
5070 | case DIF_OP_LDGS:
|
---|
5071 | id = DIF_INSTR_VAR(instr);
|
---|
5072 |
|
---|
5073 | if (id >= DIF_VAR_OTHER_UBASE) {
|
---|
5074 | uintptr_t a;
|
---|
5075 |
|
---|
5076 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5077 | svar = vstate->dtvs_globals[id];
|
---|
5078 | ASSERT(svar != NULL);
|
---|
5079 | v = &svar->dtsv_var;
|
---|
5080 |
|
---|
5081 | if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
|
---|
5082 | regs[rd] = svar->dtsv_data;
|
---|
5083 | break;
|
---|
5084 | }
|
---|
5085 |
|
---|
5086 | a = (uintptr_t)svar->dtsv_data;
|
---|
5087 |
|
---|
5088 | if (*(uint8_t *)a == UINT8_MAX) {
|
---|
5089 | /*
|
---|
5090 | * If the 0th byte is set to UINT8_MAX
|
---|
5091 | * then this is to be treated as a
|
---|
5092 | * reference to a NULL variable.
|
---|
5093 | */
|
---|
5094 | regs[rd] = NULL;
|
---|
5095 | } else {
|
---|
5096 | regs[rd] = a + sizeof (uint64_t);
|
---|
5097 | }
|
---|
5098 |
|
---|
5099 | break;
|
---|
5100 | }
|
---|
5101 |
|
---|
5102 | regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
|
---|
5103 | break;
|
---|
5104 |
|
---|
5105 | case DIF_OP_STGS:
|
---|
5106 | id = DIF_INSTR_VAR(instr);
|
---|
5107 |
|
---|
5108 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5109 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5110 |
|
---|
5111 | svar = vstate->dtvs_globals[id];
|
---|
5112 | ASSERT(svar != NULL);
|
---|
5113 | v = &svar->dtsv_var;
|
---|
5114 |
|
---|
5115 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5116 | uintptr_t a = (uintptr_t)svar->dtsv_data;
|
---|
5117 |
|
---|
5118 | ASSERT(a != NULL);
|
---|
5119 | ASSERT(svar->dtsv_size != 0);
|
---|
5120 |
|
---|
5121 | if (regs[rd] == NULL) {
|
---|
5122 | *(uint8_t *)a = UINT8_MAX;
|
---|
5123 | break;
|
---|
5124 | } else {
|
---|
5125 | *(uint8_t *)a = 0;
|
---|
5126 | a += sizeof (uint64_t);
|
---|
5127 | }
|
---|
5128 | if (!dtrace_vcanload(
|
---|
5129 | (void *)(uintptr_t)regs[rd], &v->dtdv_type,
|
---|
5130 | mstate, vstate))
|
---|
5131 | break;
|
---|
5132 |
|
---|
5133 | dtrace_vcopy((void *)(uintptr_t)regs[rd],
|
---|
5134 | (void *)a, &v->dtdv_type);
|
---|
5135 | break;
|
---|
5136 | }
|
---|
5137 |
|
---|
5138 | svar->dtsv_data = regs[rd];
|
---|
5139 | break;
|
---|
5140 |
|
---|
5141 | case DIF_OP_LDTA:
|
---|
5142 | /*
|
---|
5143 | * There are no DTrace built-in thread-local arrays at
|
---|
5144 | * present. This opcode is saved for future work.
|
---|
5145 | */
|
---|
5146 | *flags |= CPU_DTRACE_ILLOP;
|
---|
5147 | regs[rd] = 0;
|
---|
5148 | break;
|
---|
5149 |
|
---|
5150 | case DIF_OP_LDLS:
|
---|
5151 | id = DIF_INSTR_VAR(instr);
|
---|
5152 |
|
---|
5153 | if (id < DIF_VAR_OTHER_UBASE) {
|
---|
5154 | /*
|
---|
5155 | * For now, this has no meaning.
|
---|
5156 | */
|
---|
5157 | regs[rd] = 0;
|
---|
5158 | break;
|
---|
5159 | }
|
---|
5160 |
|
---|
5161 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5162 |
|
---|
5163 | ASSERT(VBDTCAST(int64_t)id < vstate->dtvs_nlocals);
|
---|
5164 | ASSERT(vstate->dtvs_locals != NULL);
|
---|
5165 |
|
---|
5166 | svar = vstate->dtvs_locals[id];
|
---|
5167 | ASSERT(svar != NULL);
|
---|
5168 | v = &svar->dtsv_var;
|
---|
5169 |
|
---|
5170 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5171 | uintptr_t a = (uintptr_t)svar->dtsv_data;
|
---|
5172 | size_t sz = v->dtdv_type.dtdt_size;
|
---|
5173 |
|
---|
5174 | sz += sizeof (uint64_t);
|
---|
5175 | ASSERT(svar->dtsv_size == NCPU * sz);
|
---|
5176 | a += VBDT_GET_CPUID() * sz;
|
---|
5177 |
|
---|
5178 | if (*(uint8_t *)a == UINT8_MAX) {
|
---|
5179 | /*
|
---|
5180 | * If the 0th byte is set to UINT8_MAX
|
---|
5181 | * then this is to be treated as a
|
---|
5182 | * reference to a NULL variable.
|
---|
5183 | */
|
---|
5184 | regs[rd] = NULL;
|
---|
5185 | } else {
|
---|
5186 | regs[rd] = a + sizeof (uint64_t);
|
---|
5187 | }
|
---|
5188 |
|
---|
5189 | break;
|
---|
5190 | }
|
---|
5191 |
|
---|
5192 | ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
|
---|
5193 | tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
|
---|
5194 | regs[rd] = tmp[VBDT_GET_CPUID()];
|
---|
5195 | break;
|
---|
5196 |
|
---|
5197 | case DIF_OP_STLS:
|
---|
5198 | id = DIF_INSTR_VAR(instr);
|
---|
5199 |
|
---|
5200 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5201 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5202 | ASSERT(VBDTCAST(int64_t)id < vstate->dtvs_nlocals);
|
---|
5203 |
|
---|
5204 | ASSERT(vstate->dtvs_locals != NULL);
|
---|
5205 | svar = vstate->dtvs_locals[id];
|
---|
5206 | ASSERT(svar != NULL);
|
---|
5207 | v = &svar->dtsv_var;
|
---|
5208 |
|
---|
5209 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5210 | uintptr_t a = (uintptr_t)svar->dtsv_data;
|
---|
5211 | size_t sz = v->dtdv_type.dtdt_size;
|
---|
5212 |
|
---|
5213 | sz += sizeof (uint64_t);
|
---|
5214 | ASSERT(svar->dtsv_size == NCPU * sz);
|
---|
5215 | a += VBDT_GET_CPUID() * sz;
|
---|
5216 |
|
---|
5217 | if (regs[rd] == NULL) {
|
---|
5218 | *(uint8_t *)a = UINT8_MAX;
|
---|
5219 | break;
|
---|
5220 | } else {
|
---|
5221 | *(uint8_t *)a = 0;
|
---|
5222 | a += sizeof (uint64_t);
|
---|
5223 | }
|
---|
5224 |
|
---|
5225 | if (!dtrace_vcanload(
|
---|
5226 | (void *)(uintptr_t)regs[rd], &v->dtdv_type,
|
---|
5227 | mstate, vstate))
|
---|
5228 | break;
|
---|
5229 |
|
---|
5230 | dtrace_vcopy((void *)(uintptr_t)regs[rd],
|
---|
5231 | (void *)a, &v->dtdv_type);
|
---|
5232 | break;
|
---|
5233 | }
|
---|
5234 |
|
---|
5235 | ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t));
|
---|
5236 | tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
|
---|
5237 | tmp[VBDT_GET_CPUID()] = regs[rd];
|
---|
5238 | break;
|
---|
5239 |
|
---|
5240 | case DIF_OP_LDTS: {
|
---|
5241 | dtrace_dynvar_t *dvar;
|
---|
5242 | dtrace_key_t *key;
|
---|
5243 |
|
---|
5244 | id = DIF_INSTR_VAR(instr);
|
---|
5245 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5246 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5247 | v = &vstate->dtvs_tlocals[id];
|
---|
5248 |
|
---|
5249 | key = &tupregs[DIF_DTR_NREGS];
|
---|
5250 | key[0].dttk_value = (uint64_t)id;
|
---|
5251 | key[0].dttk_size = 0;
|
---|
5252 | DTRACE_TLS_THRKEY(key[1].dttk_value);
|
---|
5253 | key[1].dttk_size = 0;
|
---|
5254 |
|
---|
5255 | dvar = dtrace_dynvar(dstate, 2, key,
|
---|
5256 | sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
|
---|
5257 | mstate, vstate);
|
---|
5258 |
|
---|
5259 | if (dvar == NULL) {
|
---|
5260 | regs[rd] = 0;
|
---|
5261 | break;
|
---|
5262 | }
|
---|
5263 |
|
---|
5264 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5265 | regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
|
---|
5266 | } else {
|
---|
5267 | regs[rd] = *((uint64_t *)dvar->dtdv_data);
|
---|
5268 | }
|
---|
5269 |
|
---|
5270 | break;
|
---|
5271 | }
|
---|
5272 |
|
---|
5273 | case DIF_OP_STTS: {
|
---|
5274 | dtrace_dynvar_t *dvar;
|
---|
5275 | dtrace_key_t *key;
|
---|
5276 |
|
---|
5277 | id = DIF_INSTR_VAR(instr);
|
---|
5278 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5279 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5280 |
|
---|
5281 | key = &tupregs[DIF_DTR_NREGS];
|
---|
5282 | key[0].dttk_value = (uint64_t)id;
|
---|
5283 | key[0].dttk_size = 0;
|
---|
5284 | DTRACE_TLS_THRKEY(key[1].dttk_value);
|
---|
5285 | key[1].dttk_size = 0;
|
---|
5286 | v = &vstate->dtvs_tlocals[id];
|
---|
5287 |
|
---|
5288 | dvar = dtrace_dynvar(dstate, 2, key,
|
---|
5289 | v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
|
---|
5290 | v->dtdv_type.dtdt_size : sizeof (uint64_t),
|
---|
5291 | regs[rd] ? DTRACE_DYNVAR_ALLOC :
|
---|
5292 | DTRACE_DYNVAR_DEALLOC, mstate, vstate);
|
---|
5293 |
|
---|
5294 | /*
|
---|
5295 | * Given that we're storing to thread-local data,
|
---|
5296 | * we need to flush our predicate cache.
|
---|
5297 | */
|
---|
5298 | curthread->t_predcache = NULL;
|
---|
5299 |
|
---|
5300 | if (dvar == NULL)
|
---|
5301 | break;
|
---|
5302 |
|
---|
5303 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5304 | if (!dtrace_vcanload(
|
---|
5305 | (void *)(uintptr_t)regs[rd],
|
---|
5306 | &v->dtdv_type, mstate, vstate))
|
---|
5307 | break;
|
---|
5308 |
|
---|
5309 | dtrace_vcopy((void *)(uintptr_t)regs[rd],
|
---|
5310 | dvar->dtdv_data, &v->dtdv_type);
|
---|
5311 | } else {
|
---|
5312 | *((uint64_t *)dvar->dtdv_data) = regs[rd];
|
---|
5313 | }
|
---|
5314 |
|
---|
5315 | break;
|
---|
5316 | }
|
---|
5317 |
|
---|
5318 | case DIF_OP_SRA:
|
---|
5319 | regs[rd] = (int64_t)regs[r1] >> regs[r2];
|
---|
5320 | break;
|
---|
5321 |
|
---|
5322 | case DIF_OP_CALL:
|
---|
5323 | dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
|
---|
5324 | regs, tupregs, ttop, mstate, state);
|
---|
5325 | break;
|
---|
5326 |
|
---|
5327 | case DIF_OP_PUSHTR:
|
---|
5328 | if (ttop == DIF_DTR_NREGS) {
|
---|
5329 | *flags |= CPU_DTRACE_TUPOFLOW;
|
---|
5330 | break;
|
---|
5331 | }
|
---|
5332 |
|
---|
5333 | if (r1 == DIF_TYPE_STRING) {
|
---|
5334 | /*
|
---|
5335 | * If this is a string type and the size is 0,
|
---|
5336 | * we'll use the system-wide default string
|
---|
5337 | * size. Note that we are _not_ looking at
|
---|
5338 | * the value of the DTRACEOPT_STRSIZE option;
|
---|
5339 | * had this been set, we would expect to have
|
---|
5340 | * a non-zero size value in the "pushtr".
|
---|
5341 | */
|
---|
5342 | tupregs[ttop].dttk_size =
|
---|
5343 | dtrace_strlen((char *)(uintptr_t)regs[rd],
|
---|
5344 | regs[r2] ? regs[r2] :
|
---|
5345 | dtrace_strsize_default) + 1;
|
---|
5346 | } else {
|
---|
5347 | tupregs[ttop].dttk_size = regs[r2];
|
---|
5348 | }
|
---|
5349 |
|
---|
5350 | tupregs[ttop++].dttk_value = regs[rd];
|
---|
5351 | break;
|
---|
5352 |
|
---|
5353 | case DIF_OP_PUSHTV:
|
---|
5354 | if (ttop == DIF_DTR_NREGS) {
|
---|
5355 | *flags |= CPU_DTRACE_TUPOFLOW;
|
---|
5356 | break;
|
---|
5357 | }
|
---|
5358 |
|
---|
5359 | tupregs[ttop].dttk_value = regs[rd];
|
---|
5360 | tupregs[ttop++].dttk_size = 0;
|
---|
5361 | break;
|
---|
5362 |
|
---|
5363 | case DIF_OP_POPTS:
|
---|
5364 | if (ttop != 0)
|
---|
5365 | ttop--;
|
---|
5366 | break;
|
---|
5367 |
|
---|
5368 | case DIF_OP_FLUSHTS:
|
---|
5369 | ttop = 0;
|
---|
5370 | break;
|
---|
5371 |
|
---|
5372 | case DIF_OP_LDGAA:
|
---|
5373 | case DIF_OP_LDTAA: {
|
---|
5374 | dtrace_dynvar_t *dvar;
|
---|
5375 | dtrace_key_t *key = tupregs;
|
---|
5376 | uint_t nkeys = ttop;
|
---|
5377 |
|
---|
5378 | id = DIF_INSTR_VAR(instr);
|
---|
5379 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5380 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5381 |
|
---|
5382 | key[nkeys].dttk_value = (uint64_t)id;
|
---|
5383 | key[nkeys++].dttk_size = 0;
|
---|
5384 |
|
---|
5385 | if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
|
---|
5386 | DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
|
---|
5387 | key[nkeys++].dttk_size = 0;
|
---|
5388 | v = &vstate->dtvs_tlocals[id];
|
---|
5389 | } else {
|
---|
5390 | v = &vstate->dtvs_globals[id]->dtsv_var;
|
---|
5391 | }
|
---|
5392 |
|
---|
5393 | dvar = dtrace_dynvar(dstate, nkeys, key,
|
---|
5394 | v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
|
---|
5395 | v->dtdv_type.dtdt_size : sizeof (uint64_t),
|
---|
5396 | DTRACE_DYNVAR_NOALLOC, mstate, vstate);
|
---|
5397 |
|
---|
5398 | if (dvar == NULL) {
|
---|
5399 | regs[rd] = 0;
|
---|
5400 | break;
|
---|
5401 | }
|
---|
5402 |
|
---|
5403 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5404 | regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
|
---|
5405 | } else {
|
---|
5406 | regs[rd] = *((uint64_t *)dvar->dtdv_data);
|
---|
5407 | }
|
---|
5408 |
|
---|
5409 | break;
|
---|
5410 | }
|
---|
5411 |
|
---|
5412 | case DIF_OP_STGAA:
|
---|
5413 | case DIF_OP_STTAA: {
|
---|
5414 | dtrace_dynvar_t *dvar;
|
---|
5415 | dtrace_key_t *key = tupregs;
|
---|
5416 | uint_t nkeys = ttop;
|
---|
5417 |
|
---|
5418 | id = DIF_INSTR_VAR(instr);
|
---|
5419 | ASSERT(id >= DIF_VAR_OTHER_UBASE);
|
---|
5420 | id -= DIF_VAR_OTHER_UBASE;
|
---|
5421 |
|
---|
5422 | key[nkeys].dttk_value = (uint64_t)id;
|
---|
5423 | key[nkeys++].dttk_size = 0;
|
---|
5424 |
|
---|
5425 | if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
|
---|
5426 | DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
|
---|
5427 | key[nkeys++].dttk_size = 0;
|
---|
5428 | v = &vstate->dtvs_tlocals[id];
|
---|
5429 | } else {
|
---|
5430 | v = &vstate->dtvs_globals[id]->dtsv_var;
|
---|
5431 | }
|
---|
5432 |
|
---|
5433 | dvar = dtrace_dynvar(dstate, nkeys, key,
|
---|
5434 | v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
|
---|
5435 | v->dtdv_type.dtdt_size : sizeof (uint64_t),
|
---|
5436 | regs[rd] ? DTRACE_DYNVAR_ALLOC :
|
---|
5437 | DTRACE_DYNVAR_DEALLOC, mstate, vstate);
|
---|
5438 |
|
---|
5439 | if (dvar == NULL)
|
---|
5440 | break;
|
---|
5441 |
|
---|
5442 | if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
|
---|
5443 | if (!dtrace_vcanload(
|
---|
5444 | (void *)(uintptr_t)regs[rd], &v->dtdv_type,
|
---|
5445 | mstate, vstate))
|
---|
5446 | break;
|
---|
5447 |
|
---|
5448 | dtrace_vcopy((void *)(uintptr_t)regs[rd],
|
---|
5449 | dvar->dtdv_data, &v->dtdv_type);
|
---|
5450 | } else {
|
---|
5451 | *((uint64_t *)dvar->dtdv_data) = regs[rd];
|
---|
5452 | }
|
---|
5453 |
|
---|
5454 | break;
|
---|
5455 | }
|
---|
5456 |
|
---|
5457 | case DIF_OP_ALLOCS: {
|
---|
5458 | uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
|
---|
5459 | size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
|
---|
5460 |
|
---|
5461 | /*
|
---|
5462 | * Rounding up the user allocation size could have
|
---|
5463 | * overflowed large, bogus allocations (like -1ULL) to
|
---|
5464 | * 0.
|
---|
5465 | */
|
---|
5466 | if (size < regs[r1] ||
|
---|
5467 | !DTRACE_INSCRATCH(mstate, size)) {
|
---|
5468 | DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
|
---|
5469 | regs[rd] = NULL;
|
---|
5470 | break;
|
---|
5471 | }
|
---|
5472 |
|
---|
5473 | dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
|
---|
5474 | mstate->dtms_scratch_ptr += size;
|
---|
5475 | regs[rd] = ptr;
|
---|
5476 | break;
|
---|
5477 | }
|
---|
5478 |
|
---|
5479 | case DIF_OP_COPYS:
|
---|
5480 | if (!dtrace_canstore(regs[rd], regs[r2],
|
---|
5481 | mstate, vstate)) {
|
---|
5482 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5483 | *illval = regs[rd];
|
---|
5484 | break;
|
---|
5485 | }
|
---|
5486 |
|
---|
5487 | if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
|
---|
5488 | break;
|
---|
5489 |
|
---|
5490 | dtrace_bcopy((void *)(uintptr_t)regs[r1],
|
---|
5491 | (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
|
---|
5492 | break;
|
---|
5493 |
|
---|
5494 | case DIF_OP_STB:
|
---|
5495 | if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
|
---|
5496 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5497 | *illval = regs[rd];
|
---|
5498 | break;
|
---|
5499 | }
|
---|
5500 | *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
|
---|
5501 | break;
|
---|
5502 |
|
---|
5503 | case DIF_OP_STH:
|
---|
5504 | if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
|
---|
5505 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5506 | *illval = regs[rd];
|
---|
5507 | break;
|
---|
5508 | }
|
---|
5509 | if (regs[rd] & 1) {
|
---|
5510 | *flags |= CPU_DTRACE_BADALIGN;
|
---|
5511 | *illval = regs[rd];
|
---|
5512 | break;
|
---|
5513 | }
|
---|
5514 | *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
|
---|
5515 | break;
|
---|
5516 |
|
---|
5517 | case DIF_OP_STW:
|
---|
5518 | if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
|
---|
5519 | *flags |= CPU_DTRACE_BADADDR;
|
---|
5520 | *illval = regs[rd];
|
---|
5521 | break;
|
---|
5522 | }
|
---|
5523 | if (regs[rd] & 3) {
|
---|
5524 | *flags |= CPU_DTRACE_BADALIGN;
|
---|
5525 | *illval = regs[rd];
|
---|
5526 | break;
|
---|
5527 | }
|
---|
5528 | *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1 |
---|