VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/path/RTPathGlob.cpp@ 62564

Last change on this file since 62564 was 62564, checked in by vboxsync, 9 years ago

IPRT: Mark unused parameters.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 83.7 KB
Line 
1/* $Id: RTPathGlob.cpp 62564 2016-07-26 14:43:03Z vboxsync $ */
2/** @file
3 * IPRT - RTPathGlob
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#include "internal/iprt.h"
32#include <iprt/path.h>
33
34#include <iprt/asm.h>
35#include <iprt/assert.h>
36#include <iprt/buildconfig.h>
37#include <iprt/ctype.h>
38#include <iprt/dir.h>
39#include <iprt/env.h>
40#include <iprt/err.h>
41#include <iprt/mem.h>
42#include <iprt/string.h>
43#include <iprt/uni.h>
44
45#if defined(RT_OS_WINDOWS)
46# include <Windows.h>
47# include "../../r3/win/internal-r3-win.h"
48
49#elif defined(RT_OS_OS2)
50# define INCL_BASE
51# include <os2.h>
52# undef RT_MAX /* collision */
53
54#endif
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60/** Maximum number of results. */
61#define RTPATHGLOB_MAX_RESULTS _32K
62/** Maximum number of zero-or-more wildcards in a pattern.
63 * This limits stack usage and recursion depth, as well as execution time. */
64#define RTPATHMATCH_MAX_ZERO_OR_MORE 24
65/** Maximum number of variable items. */
66#define RTPATHMATCH_MAX_VAR_ITEMS _4K
67
68
69
70/*********************************************************************************************************************************
71* Structures and Typedefs *
72*********************************************************************************************************************************/
73/**
74 * Matching operation.
75 */
76typedef enum RTPATHMATCHOP
77{
78 RTPATHMATCHOP_INVALID = 0,
79 /** EOS: Returns a match if at end of string. */
80 RTPATHMATCHOP_RETURN_MATCH_IF_AT_END,
81 /** Asterisk: Returns a match (trailing asterisk). */
82 RTPATHMATCHOP_RETURN_MATCH,
83 /** Asterisk: Returns a match (just asterisk), unless it's '.' or '..'. */
84 RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT,
85 /** Plain text: Case sensitive string compare. */
86 RTPATHMATCHOP_STRCMP,
87 /** Plain text: Case insensitive string compare. */
88 RTPATHMATCHOP_STRICMP,
89 /** Question marks: Skips exactly one code point. */
90 RTPATHMATCHOP_SKIP_ONE_CODEPOINT,
91 /** Question marks: Skips exactly RTPATHMATCHCORE::cch code points. */
92 RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS,
93 /** Char set: Requires the next codepoint to be in the ASCII-7 set defined by
94 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
95 RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7,
96 /** Char set: Requires the next codepoint to not be in the ASCII-7 set defined
97 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. No ranges. */
98 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7,
99 /** Char set: Requires the next codepoint to be in the extended set defined by
100 * RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
101 RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED,
102 /** Char set: Requires the next codepoint to not be in the extended set defined
103 * by RTPATHMATCHCORE::pch & RTPATHMATCHCORE::cch. Ranges, UTF-8. */
104 RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED,
105 /** Variable: Case sensitive variable value compare, RTPATHMATCHCORE::uOp2 is
106 * the variable table index. */
107 RTPATHMATCHOP_VARIABLE_VALUE_CMP,
108 /** Variable: Case insensitive variable value compare, RTPATHMATCHCORE::uOp2 is
109 * the variable table index. */
110 RTPATHMATCHOP_VARIABLE_VALUE_ICMP,
111 /** Asterisk: Match zero or more code points, there must be at least
112 * RTPATHMATCHCORE::cch code points after it. */
113 RTPATHMATCHOP_ZERO_OR_MORE,
114 /** Asterisk: Match zero or more code points, there must be at least
115 * RTPATHMATCHCORE::cch code points after it, unless it's '.' or '..'. */
116 RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT,
117 /** End of valid operations. */
118 RTPATHMATCHOP_END
119} RTPATHMATCHOP;
120
121/**
122 * Matching instruction.
123 */
124typedef struct RTPATHMATCHCORE
125{
126 /** The action to take. */
127 RTPATHMATCHOP enmOpCode;
128 /** Generic value operand. */
129 uint16_t uOp2;
130 /** Generic length operand. */
131 uint16_t cch;
132 /** Generic string pointer operand. */
133 const char *pch;
134} RTPATHMATCHCORE;
135/** Pointer to a matching instruction. */
136typedef RTPATHMATCHCORE *PRTPATHMATCHCORE;
137/** Pointer to a const matching instruction. */
138typedef RTPATHMATCHCORE const *PCRTPATHMATCHCORE;
139
140/**
141 * Path matching instruction allocator.
142 */
143typedef struct RTPATHMATCHALLOC
144{
145 /** Allocated array of instructions. */
146 PRTPATHMATCHCORE paInstructions;
147 /** Index of the next free entry in paScratch. */
148 uint32_t iNext;
149 /** Number of instructions allocated. */
150 uint32_t cAllocated;
151} RTPATHMATCHALLOC;
152/** Pointer to a matching instruction allocator. */
153typedef RTPATHMATCHALLOC *PRTPATHMATCHALLOC;
154
155/**
156 * Path matching cache, mainly intended for variables like the PATH.
157 */
158typedef struct RTPATHMATCHCACHE
159{
160 /** @todo optimize later. */
161 uint32_t iNothingYet;
162} RTPATHMATCHCACHE;
163/** Pointer to a path matching cache. */
164typedef RTPATHMATCHCACHE *PRTPATHMATCHCACHE;
165
166
167
168/** Parsed path entry.*/
169typedef struct RTPATHGLOBPPE
170{
171 /** Normal: Index into RTPATHGLOB::MatchInstrAlloc.paInstructions. */
172 uint32_t iMatchProg : 16;
173 /** Set if this is a normal entry which is matched using iMatchProg. */
174 uint32_t fNormal : 1;
175 /** !fNormal: Plain name that can be dealt with using without
176 * enumerating the whole directory, unless of course the file system is case
177 * sensitive and the globbing isn't (that needs figuring out on a per
178 * directory basis). */
179 uint32_t fPlain : 1;
180 /** !fNormal: Match zero or more subdirectories. */
181 uint32_t fStarStar : 1;
182 /** !fNormal: The whole component is a variable expansion. */
183 uint32_t fExpVariable : 1;
184
185 /** Filter: Set if it only matches directories. */
186 uint32_t fDir : 1;
187 /** Set if it's the final component. */
188 uint32_t fFinal : 1;
189
190 /** Unused bits. */
191 uint32_t fReserved : 2+8;
192} RTPATHGLOBPPE;
193
194
195typedef struct RTPATHGLOB
196{
197 /** Path buffer. */
198 char szPath[RTPATH_MAX];
199 /** Temporary buffers. */
200 union
201 {
202 /** File system object info structure. */
203 RTFSOBJINFO ObjInfo;
204 /** Directory entry buffer. */
205 RTDIRENTRY DirEntry;
206 /** Padding the buffer to an unreasonably large size. */
207 uint8_t abPadding[RTPATH_MAX + sizeof(RTDIRENTRY)];
208 } u;
209
210
211 /** Where to insert the next one.*/
212 PRTPATHGLOBENTRY *ppNext;
213 /** The head pointer. */
214 PRTPATHGLOBENTRY pHead;
215 /** Result count. */
216 uint32_t cResults;
217 /** Counts path overflows. */
218 uint32_t cPathOverflows;
219 /** The input flags. */
220 uint32_t fFlags;
221 /** Matching instruction allocator. */
222 RTPATHMATCHALLOC MatchInstrAlloc;
223 /** Matching state. */
224 RTPATHMATCHCACHE MatchCache;
225
226 /** The pattern string. */
227 const char *pszPattern;
228 /** The parsed path. */
229 PRTPATHPARSED pParsed;
230 /** The component to start with. */
231 uint16_t iFirstComp;
232 /** The corresponding path offset (previous components already present). */
233 uint16_t offFirstPath;
234 /** Path component information we need. */
235 RTPATHGLOBPPE aComps[1];
236} RTPATHGLOB;
237typedef RTPATHGLOB *PRTPATHGLOB;
238
239
240/**
241 * Matching variable lookup table.
242 * Currently so small we don't bother sorting it and doing binary lookups.
243 */
244typedef struct RTPATHMATCHVAR
245{
246 /** The variable name. */
247 const char *pszName;
248 /** The variable name length. */
249 uint16_t cchName;
250 /** Only available as the verify first component. */
251 bool fFirstOnly;
252
253 /**
254 * Queries a given variable value.
255 *
256 * @returns IPRT status code.
257 * @retval VERR_BUFFER_OVERFLOW
258 * @retval VERR_TRY_AGAIN if the caller should skip this value item and try the
259 * next one instead (e.g. env var not present).
260 * @retval VINF_EOF when retrieving the last one, if possible.
261 * @retval VERR_EOF when @a iItem is past the item space.
262 *
263 * @param iItem The variable value item to retrieve. (A variable may
264 * have more than one value, e.g. 'BothProgramFile' on a
265 * 64-bit system or 'Path'.)
266 * @param pszBuf Where to return the value.
267 * @param cbBuf The buffer size.
268 * @param pcchValue Where to return the length of the return string.
269 * @param pCache Pointer to the path matching cache. May speed up
270 * enumerating PATH items and similar.
271 */
272 DECLCALLBACKMEMBER(int, pfnQuery)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, PRTPATHMATCHCACHE pCache);
273
274 /**
275 * Matching method, optional.
276 *
277 * @returns IPRT status code.
278 * @retval VINF_SUCCESS on match.
279 * @retval VERR_MISMATCH on mismatch.
280 *
281 * @param pszMatch String to match with (not terminated).
282 * @param cchMatch The length of what we match with.
283 * @param fIgnoreCase Whether to ignore case or not when comparing.
284 * @param pcchMatched Where to return the length of the match (value length).
285 */
286 DECLCALLBACKMEMBER(int, pfnMatch)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, size_t *pcchMatched);
287
288} RTPATHMATCHVAR;
289
290
291/*********************************************************************************************************************************
292* Internal Functions *
293*********************************************************************************************************************************/
294static int rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp, size_t offStarStarPath);
295static int rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
296static int rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
297static int rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp);
298
299
300/**
301 * Implements the two variable access functions for a simple one value variable.
302 */
303#define RTPATHMATCHVAR_SIMPLE(a_Name, a_GetStrExpr) \
304 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
305 PRTPATHMATCHCACHE pCache) \
306 { \
307 if (iItem == 0) \
308 { \
309 const char *pszValue = a_GetStrExpr; \
310 size_t cchValue = strlen(pszValue); \
311 if (cchValue + 1 <= cbBuf) \
312 { \
313 memcpy(pszBuf, pszValue, cchValue + 1); \
314 *pcchValue = cchValue; \
315 return VINF_EOF; \
316 } \
317 return VERR_BUFFER_OVERFLOW; \
318 } \
319 NOREF(pCache);\
320 return VERR_EOF; \
321 } \
322 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
323 size_t *pcchMatched) \
324 { \
325 const char *pszValue = a_GetStrExpr; \
326 size_t cchValue = strlen(pszValue); \
327 if ( cchValue >= cchMatch \
328 && ( !fIgnoreCase \
329 ? memcmp(pszValue, pchMatch, cchValue) == 0 \
330 : RTStrNICmp(pszValue, pchMatch, cchValue) == 0) ) \
331 { \
332 *pcchMatched = cchValue; \
333 return VINF_SUCCESS; \
334 } \
335 return VERR_MISMATCH; \
336 } \
337 typedef int RT_CONCAT(DummyColonType_,a_Name)
338
339/**
340 * Implements mapping a glob variable to an environment variable.
341 */
342#define RTPATHMATCHVAR_SIMPLE_ENVVAR(a_Name, a_pszEnvVar, a_cbMaxValue) \
343 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
344 PRTPATHMATCHCACHE pCache) \
345 { \
346 if (iItem == 0) \
347 { \
348 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, pszBuf, cbBuf, pcchValue); \
349 if (RT_SUCCESS(rc)) \
350 return VINF_EOF; \
351 if (rc != VERR_ENV_VAR_NOT_FOUND) \
352 return rc; \
353 } \
354 NOREF(pCache);\
355 return VERR_EOF; \
356 } \
357 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
358 size_t *pcchMatched) \
359 { \
360 char szValue[a_cbMaxValue]; \
361 size_t cchValue; \
362 int rc = RTEnvGetEx(RTENV_DEFAULT, a_pszEnvVar, szValue, sizeof(szValue), &cchValue); \
363 if ( RT_SUCCESS(rc) \
364 && cchValue >= cchMatch \
365 && ( !fIgnoreCase \
366 ? memcmp(szValue, pchMatch, cchValue) == 0 \
367 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
368 { \
369 *pcchMatched = cchValue; \
370 return VINF_SUCCESS; \
371 } \
372 return VERR_MISMATCH; \
373 } \
374 typedef int RT_CONCAT(DummyColonType_,a_Name)
375
376/**
377 * Implements mapping a glob variable to multiple environment variable values.
378 *
379 * @param a_Name The variable name.
380 * @param a_apszVarNames Assumes to be a global variable that RT_ELEMENTS
381 * works correctly on.
382 * @param a_cbMaxValue The max expected value size.
383 */
384#define RTPATHMATCHVAR_MULTIPLE_ENVVARS(a_Name, a_apszVarNames, a_cbMaxValue) \
385 static DECLCALLBACK(int) RT_CONCAT(rtPathVarQuery_,a_Name)(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue, \
386 PRTPATHMATCHCACHE pCache) \
387 { \
388 if (iItem < RT_ELEMENTS(a_apszVarNames)) \
389 { \
390 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], pszBuf, cbBuf, pcchValue); \
391 if (RT_SUCCESS(rc)) \
392 return iItem + 1 == RT_ELEMENTS(a_apszVarNames) ? VINF_EOF : VINF_SUCCESS; \
393 if (rc == VERR_ENV_VAR_NOT_FOUND) \
394 rc = VERR_TRY_AGAIN; \
395 return rc; \
396 } \
397 NOREF(pCache);\
398 return VERR_EOF; \
399 } \
400 static DECLCALLBACK(int) RT_CONCAT(rtPathVarMatch_,a_Name)(const char *pchMatch, size_t cchMatch, bool fIgnoreCase, \
401 size_t *pcchMatched) \
402 { \
403 for (uint32_t iItem = 0; iItem < RT_ELEMENTS(a_apszVarNames); iItem++) \
404 { \
405 char szValue[a_cbMaxValue]; \
406 size_t cchValue; \
407 int rc = RTEnvGetEx(RTENV_DEFAULT, a_apszVarNames[iItem], szValue, sizeof(szValue), &cchValue);\
408 if ( RT_SUCCESS(rc) \
409 && cchValue >= cchMatch \
410 && ( !fIgnoreCase \
411 ? memcmp(szValue, pchMatch, cchValue) == 0 \
412 : RTStrNICmp(szValue, pchMatch, cchValue) == 0) ) \
413 { \
414 *pcchMatched = cchValue; \
415 return VINF_SUCCESS; \
416 } \
417 } \
418 return VERR_MISMATCH; \
419 } \
420 typedef int RT_CONCAT(DummyColonType_,a_Name)
421
422
423RTPATHMATCHVAR_SIMPLE(Arch, RTBldCfgTargetArch());
424RTPATHMATCHVAR_SIMPLE(Bits, RT_XSTR(ARCH_BITS));
425#ifdef RT_OS_WINDOWS
426RTPATHMATCHVAR_SIMPLE_ENVVAR(WinAppData, "AppData", RTPATH_MAX);
427RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramData, "ProgramData", RTPATH_MAX);
428RTPATHMATCHVAR_SIMPLE_ENVVAR(WinProgramFiles, "ProgramFiles", RTPATH_MAX);
429RTPATHMATCHVAR_SIMPLE_ENVVAR(WinCommonProgramFiles, "CommonProgramFiles", RTPATH_MAX);
430# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
431RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherProgramFiles, "ProgramFiles(x86)", RTPATH_MAX);
432RTPATHMATCHVAR_SIMPLE_ENVVAR(WinOtherCommonProgramFiles, "CommonProgramFiles(x86)", RTPATH_MAX);
433# else
434# error "Port ME!"
435# endif
436static const char * const a_apszWinProgramFilesVars[] =
437{
438 "ProgramFiles",
439# ifdef RT_ARCH_AMD64
440 "ProgramFiles(x86)",
441# endif
442};
443RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllProgramFiles, a_apszWinProgramFilesVars, RTPATH_MAX);
444static const char * const a_apszWinCommonProgramFilesVars[] =
445{
446 "CommonProgramFiles",
447# ifdef RT_ARCH_AMD64
448 "CommonProgramFiles(x86)",
449# endif
450};
451RTPATHMATCHVAR_MULTIPLE_ENVVARS(WinAllCommonProgramFiles, a_apszWinCommonProgramFilesVars, RTPATH_MAX);
452#endif
453
454
455/**
456 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery, Enumerates the PATH}
457 */
458static DECLCALLBACK(int) rtPathVarQuery_Path(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
459 PRTPATHMATCHCACHE pCache)
460{
461 RT_NOREF_PV(pCache);
462
463 /*
464 * Query the PATH value.
465 */
466/** @todo cache this in pCache with iItem and offset. */
467 char *pszPathFree = NULL;
468 char *pszPath = pszBuf;
469 size_t cchActual;
470 const char *pszVarNm = "PATH";
471 int rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPath, cbBuf, &cchActual);
472#ifdef RT_OS_WINDOWS
473 if (rc == VERR_ENV_VAR_NOT_FOUND)
474 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm = "Path", pszPath, cbBuf, &cchActual);
475#endif
476 if (rc == VERR_BUFFER_OVERFLOW)
477 {
478 for (uint32_t iTry = 0; iTry < 10; iTry++)
479 {
480 size_t cbPathBuf = RT_ALIGN_Z(cchActual + 1 + 64 * iTry, 64);
481 pszPathFree = (char *)RTMemTmpAlloc(cbPathBuf);
482 rc = RTEnvGetEx(RTENV_DEFAULT, pszVarNm, pszPathFree, cbPathBuf, &cchActual);
483 if (RT_SUCCESS(rc))
484 break;
485 RTMemTmpFree(pszPathFree);
486 AssertReturn(cchActual >= cbPathBuf, VERR_INTERNAL_ERROR_3);
487 }
488 pszPath = pszPathFree;
489 }
490
491 /*
492 * Spool forward to the given PATH item.
493 */
494 rc = VERR_EOF;
495#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
496 const char chSep = ';';
497#else
498 const char chSep = ':';
499#endif
500 while (*pszPath != '\0')
501 {
502 char *pchSep = strchr(pszPath, chSep);
503
504 /* We ignore empty strings, which is probably not entirely correct,
505 but works better on DOS based system with many entries added
506 without checking whether there is a trailing separator or not.
507 Thus, the current directory is only searched if a '.' is present
508 in the PATH. */
509 if (pchSep == pszPath)
510 pszPath++;
511 else if (iItem > 0)
512 {
513 /* If we didn't find a separator, the item doesn't exists. Quit. */
514 if (!pchSep)
515 break;
516
517 pszPath = pchSep + 1;
518 iItem--;
519 }
520 else
521 {
522 /* We've reached the item we wanted. */
523 size_t cchComp = pchSep ? pchSep - pszPath : strlen(pszPath);
524 if (cchComp < cbBuf)
525 {
526 if (pszBuf != pszPath)
527 memmove(pszBuf, pszPath, cchComp);
528 pszBuf[cchComp] = '\0';
529 rc = pchSep ? VINF_SUCCESS : VINF_EOF;
530 }
531 else
532 rc = VERR_BUFFER_OVERFLOW;
533 *pcchValue = cchComp;
534 break;
535 }
536 }
537
538 if (pszPathFree)
539 RTMemTmpFree(pszPathFree);
540 return rc;
541}
542
543
544#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
545/**
546 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
547 * The system drive letter + colon.}.
548 */
549static DECLCALLBACK(int) rtPathVarQuery_DosSystemDrive(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
550 PRTPATHMATCHCACHE pCache)
551{
552 if (iItem == 0)
553 {
554 AssertReturn(cbBuf >= 3, VERR_BUFFER_OVERFLOW);
555
556# ifdef RT_OS_WINDOWS
557 /* Since this is used at the start of a pattern, we assume
558 we've got more than enough buffer space. */
559 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
560 PRTUTF16 pwszTmp = (PRTUTF16)pszBuf;
561 UINT cch = g_pfnGetSystemWindowsDirectoryW(pwszTmp, (UINT)(cbBuf / sizeof(WCHAR)));
562 if (cch >= 2)
563 {
564 RTUTF16 wcDrive = pwszTmp[0];
565 if ( RT_C_IS_ALPHA(wcDrive)
566 && pwszTmp[1] == ':')
567 {
568 pszBuf[0] = wcDrive;
569 pszBuf[1] = ':';
570 pszBuf[2] = '\0';
571 *pcchValue = 2;
572 return VINF_EOF;
573 }
574 }
575# else
576 ULONG ulDrive = ~(ULONG)0;
577 APIRET rc = DosQuerySysInfo(QSV_BOOT_DRIVE, QSV_BOOT_DRIVE, &ulDrive, sizeof(ulDrive));
578 ulDrive--; /* 1 = 'A' */
579 if ( rc == NO_ERROR
580 && ulDrive <= (ULONG)'Z')
581 {
582 pszBuf[0] = (char)ulDrive + 'A';
583 pszBuf[1] = ':';
584 pszBuf[2] = '\0';
585 *pcchValue = 2;
586 return VINF_EOF;
587 }
588# endif
589 return VERR_INTERNAL_ERROR_4;
590 }
591 return VERR_EOF;
592}
593#endif
594
595
596#ifdef RT_OS_WINDOWS
597/**
598 * @interface_method_impl{RTPATHMATCHVAR,pfnQuery,
599 * The system root directory (C:\Windows).}.
600 */
601static DECLCALLBACK(int) rtPathVarQuery_WinSystemRoot(uint32_t iItem, char *pszBuf, size_t cbBuf, size_t *pcchValue,
602 PRTPATHMATCHCACHE pCache)
603{
604 if (iItem == 0)
605 {
606 Assert(pszBuf); Assert(cbBuf);
607 AssertReturn(g_pfnGetSystemWindowsDirectoryW, VERR_SYMBOL_NOT_FOUND);
608 RTUTF16 wszSystemRoot[MAX_PATH];
609 UINT cchSystemRoot = g_pfnGetSystemWindowsDirectoryW(wszSystemRoot, MAX_PATH);
610 if (cchSystemRoot > 0)
611 return RTUtf16ToUtf8Ex(wszSystemRoot, cchSystemRoot, &pszBuf, cbBuf, pcchValue);
612 return RTErrConvertFromWin32(GetLastError());
613 }
614 return VERR_EOF;
615}
616#endif
617
618#undef RTPATHMATCHVAR_SIMPLE
619#undef RTPATHMATCHVAR_SIMPLE_ENVVAR
620#undef RTPATHMATCHVAR_DOUBLE_ENVVAR
621
622/**
623 * Variables.
624 */
625static RTPATHMATCHVAR const g_aVariables[] =
626{
627 { RT_STR_TUPLE("Arch"), false, rtPathVarQuery_Arch, rtPathVarMatch_Arch },
628 { RT_STR_TUPLE("Bits"), false, rtPathVarQuery_Bits, rtPathVarMatch_Bits },
629 { RT_STR_TUPLE("Path"), true, rtPathVarQuery_Path, NULL },
630#if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
631 { RT_STR_TUPLE("SystemDrive"), true, rtPathVarQuery_DosSystemDrive, NULL },
632#endif
633#ifdef RT_OS_WINDOWS
634 { RT_STR_TUPLE("SystemRoot"), true, rtPathVarQuery_WinSystemRoot, NULL },
635 { RT_STR_TUPLE("AppData"), true, rtPathVarQuery_WinAppData, rtPathVarMatch_WinAppData },
636 { RT_STR_TUPLE("ProgramData"), true, rtPathVarQuery_WinProgramData, rtPathVarMatch_WinProgramData },
637 { RT_STR_TUPLE("ProgramFiles"), true, rtPathVarQuery_WinProgramFiles, rtPathVarMatch_WinProgramFiles },
638 { RT_STR_TUPLE("OtherProgramFiles"), true, rtPathVarQuery_WinOtherProgramFiles, rtPathVarMatch_WinOtherProgramFiles },
639 { RT_STR_TUPLE("AllProgramFiles"), true, rtPathVarQuery_WinAllProgramFiles, rtPathVarMatch_WinAllProgramFiles },
640 { RT_STR_TUPLE("CommonProgramFiles"), true, rtPathVarQuery_WinCommonProgramFiles, rtPathVarMatch_WinCommonProgramFiles },
641 { RT_STR_TUPLE("OtherCommonProgramFiles"), true, rtPathVarQuery_WinOtherCommonProgramFiles, rtPathVarMatch_WinOtherCommonProgramFiles },
642 { RT_STR_TUPLE("AllCommonProgramFiles"), true, rtPathVarQuery_WinAllCommonProgramFiles, rtPathVarMatch_WinAllCommonProgramFiles },
643#endif
644};
645
646
647
648/**
649 * Handles a complicated set.
650 *
651 * A complicated set is either using ranges, character classes or code points
652 * outside the ASCII-7 range.
653 *
654 * @returns VINF_SUCCESS or VERR_MISMATCH. May also return UTF-8 decoding
655 * errors as well as VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED.
656 *
657 * @param ucInput The input code point to match with.
658 * @param pchSet The start of the set specification (after caret).
659 * @param cchSet The length of the set specification.
660 */
661static int rtPathMatchExecExtendedSet(RTUNICP ucInput, const char *pchSet, size_t cchSet)
662{
663 while (cchSet > 0)
664 {
665 RTUNICP ucSet;
666 int rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet);
667 AssertRCReturn(rc, rc);
668
669 /*
670 * Check for character class, collating symbol and equvalence class.
671 */
672 if (ucSet == '[' && cchSet > 0)
673 {
674 char chNext = *pchSet;
675 if (chNext == ':')
676 {
677#define CHECK_CHAR_CLASS(a_szClassNm, a_BoolTestExpr) \
678 if ( cchSet >= sizeof(a_szClassNm) \
679 && memcmp(pchSet, a_szClassNm "]", sizeof(a_szClassNm)) == 0) \
680 { \
681 if (a_BoolTestExpr) \
682 return VINF_SUCCESS; \
683 pchSet += sizeof(a_szClassNm); \
684 cchSet -= sizeof(a_szClassNm); \
685 continue; \
686 } do { } while (0)
687
688 CHECK_CHAR_CLASS(":alpha:", RTUniCpIsAlphabetic(ucInput));
689 CHECK_CHAR_CLASS(":alnum:", RTUniCpIsAlphabetic(ucInput) || RTUniCpIsDecDigit(ucInput)); /** @todo figure what's correct here and fix uni.h */
690 CHECK_CHAR_CLASS(":blank:", ucInput == ' ' || ucInput == '\t');
691 CHECK_CHAR_CLASS(":cntrl:", ucInput < 31 || ucInput == 127);
692 CHECK_CHAR_CLASS(":digit:", RTUniCpIsDecDigit(ucInput));
693 CHECK_CHAR_CLASS(":lower:", RTUniCpIsLower(ucInput));
694 CHECK_CHAR_CLASS(":print:", RTUniCpIsAlphabetic(ucInput) || (RT_C_IS_PRINT(ucInput) && ucInput < 127)); /** @todo fixme*/
695 CHECK_CHAR_CLASS(":punct:", RT_C_IS_PRINT(ucInput) && ucInput < 127); /** @todo fixme*/
696 CHECK_CHAR_CLASS(":space:", RTUniCpIsSpace(ucInput));
697 CHECK_CHAR_CLASS(":upper:", RTUniCpIsUpper(ucInput));
698 CHECK_CHAR_CLASS(":xdigit:", RTUniCpIsHexDigit(ucInput));
699 AssertMsgFailedReturn(("Unknown or malformed char class: '%.*s'\n", cchSet + 1, pchSet - 1),
700 VERR_PATH_GLOB_UNKNOWN_CHAR_CLASS);
701#undef CHECK_CHAR_CLASS
702 }
703 /** @todo implement collating symbol and equvalence class. */
704 else if (chNext == '=' || chNext == '.')
705 AssertFailedReturn(VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
706 }
707
708 /*
709 * Check for range (leading or final dash does not constitute a range).
710 */
711 if (cchSet > 1 && *pchSet == '-')
712 {
713 pchSet++; /* skip dash */
714 cchSet--;
715
716 RTUNICP ucSet2;
717 rc = RTStrGetCpNEx(&pchSet, &cchSet, &ucSet2);
718 AssertRCReturn(rc, rc);
719 Assert(ucSet < ucSet2);
720 if (ucInput >= ucSet && ucInput <= ucSet2)
721 return VINF_SUCCESS;
722 }
723 /*
724 * Single char comparison.
725 */
726 else if (ucInput == ucSet)
727 return VINF_SUCCESS;
728 }
729 return VERR_MISMATCH;
730}
731
732
733/**
734 * Variable matching fallback using the query function.
735 *
736 * This must not be inlined as it consuming a lot of stack! Which is why it's
737 * placed a couple of functions away from the recursive rtPathExecMatch.
738 *
739 * @returns VINF_SUCCESS or VERR_MISMATCH.
740 * @param pchInput The current input position.
741 * @param cchInput The amount of input left..
742 * @param idxVar The variable table index.
743 * @param fIgnoreCase Whether to ignore case when comparing.
744 * @param pcchMatched Where to return how much we actually matched up.
745 * @param pCache Pointer to the path matching cache.
746 */
747DECL_NO_INLINE(static, int) rtPathMatchExecVariableFallback(const char *pchInput, size_t cchInput, uint16_t idxVar,
748 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
749{
750 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
751 {
752 char szValue[RTPATH_MAX];
753 size_t cchValue;
754 int rc = g_aVariables[idxVar].pfnQuery(iItem, szValue, sizeof(szValue), &cchValue, pCache);
755 if (RT_SUCCESS(rc))
756 {
757 if (cchValue <= cchInput)
758 {
759 if ( !fIgnoreCase
760 ? memcmp(pchInput, szValue, cchValue) == 0
761 : RTStrNICmp(pchInput, szValue, cchValue) == 0)
762 {
763 *pcchMatched = cchValue;
764 return VINF_SUCCESS;
765 }
766 }
767 if (rc == VINF_EOF)
768 return VERR_MISMATCH;
769 }
770 else if (rc == VERR_EOF)
771 return VERR_MISMATCH;
772 else
773 Assert(rc == VERR_BUFFER_OVERFLOW || rc == VERR_TRY_AGAIN);
774 }
775 AssertFailed();
776 return VERR_MISMATCH;
777}
778
779
780/**
781 * Variable matching worker.
782 *
783 * @returns VINF_SUCCESS or VERR_MISMATCH.
784 * @param pchInput The current input position.
785 * @param cchInput The amount of input left..
786 * @param idxVar The variable table index.
787 * @param fIgnoreCase Whether to ignore case when comparing.
788 * @param pcchMatched Where to return how much we actually matched up.
789 * @param pCache Pointer to the path matching cache.
790 */
791static int rtPathMatchExecVariable(const char *pchInput, size_t cchInput, uint16_t idxVar,
792 bool fIgnoreCase, size_t *pcchMatched, PRTPATHMATCHCACHE pCache)
793{
794 Assert(idxVar < RT_ELEMENTS(g_aVariables));
795 if (g_aVariables[idxVar].pfnMatch)
796 return g_aVariables[idxVar].pfnMatch(pchInput, cchInput, fIgnoreCase, pcchMatched);
797 return rtPathMatchExecVariableFallback(pchInput, cchInput, idxVar, fIgnoreCase, pcchMatched, pCache);
798}
799
800
801/**
802 * Variable matching worker.
803 *
804 * @returns VINF_SUCCESS or VERR_MISMATCH.
805 * @param pchInput The current input position.
806 * @param cchInput The amount of input left..
807 * @param pProg The first matching program instruction.
808 * @param pCache Pointer to the path matching cache.
809 */
810static int rtPathMatchExec(const char *pchInput, size_t cchInput, PCRTPATHMATCHCORE pProg, PRTPATHMATCHCACHE pCache)
811{
812 for (;;)
813 {
814 switch (pProg->enmOpCode)
815 {
816 case RTPATHMATCHOP_RETURN_MATCH_IF_AT_END:
817 return cchInput == 0 ? VINF_SUCCESS : VERR_MISMATCH;
818
819 case RTPATHMATCHOP_RETURN_MATCH:
820 return VINF_SUCCESS;
821
822 case RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT:
823 if ( cchInput > 2
824 || cchInput < 1
825 || pchInput[0] != '.'
826 || (cchInput == 2 && pchInput[1] != '.') )
827 return VINF_SUCCESS;
828 return VERR_MISMATCH;
829
830 case RTPATHMATCHOP_STRCMP:
831 if (pProg->cch > cchInput)
832 return VERR_MISMATCH;
833 if (memcmp(pchInput, pProg->pch, pProg->cch) != 0)
834 return VERR_MISMATCH;
835 cchInput -= pProg->cch;
836 pchInput += pProg->cch;
837 break;
838
839 case RTPATHMATCHOP_STRICMP:
840 if (pProg->cch > cchInput)
841 return VERR_MISMATCH;
842 if (RTStrNICmp(pchInput, pProg->pch, pProg->cch) != 0)
843 return VERR_MISMATCH;
844 cchInput -= pProg->cch;
845 pchInput += pProg->cch;
846 break;
847
848 case RTPATHMATCHOP_SKIP_ONE_CODEPOINT:
849 {
850 if (cchInput == 0)
851 return VERR_MISMATCH;
852 RTUNICP ucInputIgnore;
853 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
854 AssertRCReturn(rc, rc);
855 break;
856 }
857
858 case RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS:
859 {
860 uint16_t cCpsLeft = pProg->cch;
861 Assert(cCpsLeft > 1);
862 if (cCpsLeft > cchInput)
863 return VERR_MISMATCH;
864 while (cCpsLeft-- > 0)
865 {
866 RTUNICP ucInputIgnore;
867 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInputIgnore);
868 if (RT_FAILURE(rc))
869 return rc == VERR_END_OF_STRING ? VERR_MISMATCH : rc;
870 }
871 break;
872 }
873
874 case RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7:
875 {
876 if (cchInput == 0)
877 return VERR_MISMATCH;
878 RTUNICP ucInput;
879 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
880 AssertRCReturn(rc, rc);
881 if (ucInput >= 0x80)
882 return VERR_MISMATCH;
883 if (memchr(pProg->pch, (char)ucInput, pProg->cch) == NULL)
884 return VERR_MISMATCH;
885 break;
886 }
887
888 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7:
889 {
890 if (cchInput == 0)
891 return VERR_MISMATCH;
892 RTUNICP ucInput;
893 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
894 AssertRCReturn(rc, rc);
895 if (ucInput >= 0x80)
896 break;
897 if (memchr(pProg->pch, (char)ucInput, pProg->cch) != NULL)
898 return VERR_MISMATCH;
899 break;
900 }
901
902 case RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED:
903 {
904 if (cchInput == 0)
905 return VERR_MISMATCH;
906 RTUNICP ucInput;
907 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
908 AssertRCReturn(rc, rc);
909 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
910 if (rc == VINF_SUCCESS)
911 break;
912 return rc;
913 }
914
915 case RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED:
916 {
917 if (cchInput == 0)
918 return VERR_MISMATCH;
919 RTUNICP ucInput;
920 int rc = RTStrGetCpNEx(&pchInput, &cchInput, &ucInput);
921 AssertRCReturn(rc, rc);
922 rc = rtPathMatchExecExtendedSet(ucInput, pProg->pch, pProg->cch);
923 if (rc == VERR_MISMATCH)
924 break;
925 if (rc == VINF_SUCCESS)
926 rc = VERR_MISMATCH;
927 return rc;
928 }
929
930 case RTPATHMATCHOP_VARIABLE_VALUE_CMP:
931 case RTPATHMATCHOP_VARIABLE_VALUE_ICMP:
932 {
933 size_t cchMatched = 0;
934 int rc = rtPathMatchExecVariable(pchInput, cchInput, pProg->uOp2,
935 pProg->enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP, &cchMatched, pCache);
936 if (rc == VINF_SUCCESS)
937 {
938 pchInput += cchMatched;
939 cchInput -= cchMatched;
940 break;
941 }
942 return rc;
943 }
944
945 /*
946 * This is the expensive one. It always completes the program.
947 */
948 case RTPATHMATCHOP_ZERO_OR_MORE:
949 {
950 if (cchInput < pProg->cch)
951 return VERR_MISMATCH;
952 size_t cchMatched = cchInput - pProg->cch;
953 do
954 {
955 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
956 if (RT_SUCCESS(rc))
957 return rc;
958 } while (cchMatched-- > 0);
959 return VERR_MISMATCH;
960 }
961
962 /*
963 * Variant of the above that doesn't match '.' and '..' entries.
964 */
965 case RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT:
966 {
967 if (cchInput < pProg->cch)
968 return VERR_MISMATCH;
969 if ( cchInput <= 2
970 && cchInput > 0
971 && pchInput[0] == '.'
972 && (cchInput == 1 || pchInput[1] == '.') )
973 return VERR_MISMATCH;
974 size_t cchMatched = cchInput - pProg->cch;
975 do
976 {
977 int rc = rtPathMatchExec(&pchInput[cchMatched], cchInput - cchMatched, pProg + 1, pCache);
978 if (RT_SUCCESS(rc))
979 return rc;
980 } while (cchMatched-- > 0);
981 return VERR_MISMATCH;
982 }
983
984 default:
985 AssertMsgFailedReturn(("enmOpCode=%d\n", pProg->enmOpCode), VERR_INTERNAL_ERROR_3);
986 }
987
988 pProg++;
989 }
990}
991
992
993
994
995/**
996 * Compiles a path matching program.
997 *
998 * @returns IPRT status code.
999 * @param pchPattern The pattern to compile.
1000 * @param cchPattern The length of the pattern.
1001 * @param fIgnoreCase Whether to ignore case or not when doing the
1002 * actual matching later on.
1003 * @param pAllocator Pointer to the instruction allocator & result
1004 * array. The compiled "program" starts at
1005 * PRTPATHMATCHALLOC::paInstructions[PRTPATHMATCHALLOC::iNext]
1006 * (input iNext value).
1007 *
1008 * @todo Expose this matching code and also use it for RTDirOpenFiltered
1009 */
1010static int rtPathMatchCompile(const char *pchPattern, size_t cchPattern, bool fIgnoreCase, PRTPATHMATCHALLOC pAllocator)
1011{
1012 /** @todo PORTME: big endian. */
1013 static const uint8_t s_bmMetaChars[256/8] =
1014 {
1015 0x00, 0x00, 0x00, 0x00, /* 0 thru 31 */
1016 0x10, 0x04, 0x00, 0x80, /* 32 thru 63 */
1017 0x00, 0x00, 0x00, 0x08, /* 64 thru 95 */
1018 0x00, 0x00, 0x00, 0x00, /* 96 thru 127 */
1019 /* UTF-8 multibyte: */
1020 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1021 };
1022 Assert(ASMBitTest(s_bmMetaChars, '$')); AssertCompile('$' == 0x24 /*36*/);
1023 Assert(ASMBitTest(s_bmMetaChars, '*')); AssertCompile('*' == 0x2a /*42*/);
1024 Assert(ASMBitTest(s_bmMetaChars, '?')); AssertCompile('?' == 0x3f /*63*/);
1025 Assert(ASMBitTest(s_bmMetaChars, '[')); AssertCompile('[' == 0x5b /*91*/);
1026
1027 /*
1028 * For checking for the first instruction.
1029 */
1030 uint16_t const iFirst = pAllocator->iNext;
1031
1032 /*
1033 * This is for tracking zero-or-more instructions and for calculating
1034 * the minimum amount of input required for it to be considered.
1035 */
1036 uint16_t aiZeroOrMore[RTPATHMATCH_MAX_ZERO_OR_MORE];
1037 uint8_t cZeroOrMore = 0;
1038 size_t offInput = 0;
1039
1040 /*
1041 * Loop thru the pattern and translate it into string matching instructions.
1042 */
1043 for (;;)
1044 {
1045 /*
1046 * Allocate the next instruction.
1047 */
1048 if (pAllocator->iNext >= pAllocator->cAllocated)
1049 {
1050 uint32_t cNew = pAllocator->cAllocated ? pAllocator->cAllocated * 2 : 2;
1051 void *pvNew = RTMemRealloc(pAllocator->paInstructions, cNew * sizeof(pAllocator->paInstructions[0]));
1052 AssertReturn(pvNew, VERR_NO_MEMORY);
1053 pAllocator->paInstructions = (PRTPATHMATCHCORE)pvNew;
1054 pAllocator->cAllocated = cNew;
1055 }
1056 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[pAllocator->iNext++];
1057 pInstr->pch = pchPattern;
1058 pInstr->cch = 0;
1059 pInstr->uOp2 = 0;
1060
1061 /*
1062 * Special case: End of pattern.
1063 */
1064 if (!cchPattern)
1065 {
1066 pInstr->enmOpCode = RTPATHMATCHOP_RETURN_MATCH_IF_AT_END;
1067 break;
1068 }
1069
1070 /*
1071 * Parse the next bit of the pattern.
1072 */
1073 char ch = *pchPattern;
1074 if (ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1075 {
1076 /*
1077 * Zero or more characters wildcard.
1078 */
1079 if (ch == '*')
1080 {
1081 /* Skip extra asterisks. */
1082 do
1083 {
1084 cchPattern--;
1085 pchPattern++;
1086 } while (cchPattern > 0 && *pchPattern == '*');
1087
1088 /* There is a special optimization for trailing '*'. */
1089 pInstr->cch = 1;
1090 if (cchPattern == 0)
1091 {
1092 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1093 ? RTPATHMATCHOP_RETURN_MATCH_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_RETURN_MATCH;
1094 break;
1095 }
1096
1097 pInstr->enmOpCode = iFirst + 1U == pAllocator->iNext
1098 ? RTPATHMATCHOP_ZERO_OR_MORE_EXCEPT_DOT_AND_DOTDOT : RTPATHMATCHOP_ZERO_OR_MORE;
1099 pInstr->uOp2 = (uint16_t)offInput;
1100 AssertReturn(cZeroOrMore < RT_ELEMENTS(aiZeroOrMore), VERR_OUT_OF_RANGE);
1101 aiZeroOrMore[cZeroOrMore] = (uint16_t)(pInstr - pAllocator->paInstructions);
1102
1103 /* cchInput unchanged, zero-or-more matches. */
1104 continue;
1105 }
1106
1107 /*
1108 * Single character wildcard.
1109 */
1110 if (ch == '?')
1111 {
1112 /* Count them if more. */
1113 uint16_t cchQms = 1;
1114 while (cchQms < cchPattern && pchPattern[cchQms] == '?')
1115 cchQms++;
1116
1117 pInstr->cch = cchQms;
1118 pInstr->enmOpCode = cchQms == 1 ? RTPATHMATCHOP_SKIP_ONE_CODEPOINT : RTPATHMATCHOP_SKIP_MULTIPLE_CODEPOINTS;
1119
1120 cchPattern -= cchQms;
1121 pchPattern += cchQms;
1122 offInput += cchQms;
1123 continue;
1124 }
1125
1126 /*
1127 * Character in set.
1128 *
1129 * Note that we skip the first char in the set as that is the only place
1130 * ']' can be placed if one desires to explicitly include it in the set.
1131 * To make life a bit more interesting, [:class:] is allowed inside the
1132 * set, so we have to do the counting game to find the end.
1133 */
1134 if (ch == '[')
1135 {
1136 if ( cchPattern > 2
1137 && (const char *)memchr(pchPattern + 2, ']', cchPattern) != NULL)
1138 {
1139
1140 /* Check for not-in. */
1141 bool fInverted = false;
1142 size_t offStart = 1;
1143 if (pchPattern[offStart] == '^')
1144 {
1145 fInverted = true;
1146 offStart++;
1147 }
1148
1149 /* Special case for ']' as the first char, it doesn't indicate closing then. */
1150 size_t off = offStart;
1151 if (pchPattern[off] == ']')
1152 off++;
1153
1154 bool fExtended = false;
1155 while (off < cchPattern)
1156 {
1157 ch = pchPattern[off++];
1158 if (ch == '[')
1159 {
1160 if (off < cchPattern)
1161 {
1162 char chOpen = pchPattern[off];
1163 if ( chOpen == ':'
1164 || chOpen == '='
1165 || chOpen == '.')
1166 {
1167 off++;
1168 const char *pchFound = (const char *)memchr(&pchPattern[off], ']', cchPattern - off);
1169 if ( pchFound
1170 && pchFound[-1] == chOpen)
1171 {
1172 fExtended = true;
1173 off = pchFound - pchPattern + 1;
1174 }
1175 else
1176 AssertFailed();
1177 }
1178 }
1179 }
1180 /* Check for closing. */
1181 else if (ch == ']')
1182 break;
1183 /* Check for range expression, promote to extended if this happens. */
1184 else if ( ch == '-'
1185 && off != offStart + 1
1186 && off < cchPattern
1187 && pchPattern[off] != ']')
1188 fExtended = true;
1189 /* UTF-8 multibyte chars forces us to use the extended version too. */
1190 else if ((uint8_t)ch >= 0x80)
1191 fExtended = true;
1192 }
1193
1194 if (ch == ']')
1195 {
1196 pInstr->pch = &pchPattern[offStart];
1197 pInstr->cch = (uint16_t)(off - offStart - 1);
1198 if (!fExtended)
1199 pInstr->enmOpCode = !fInverted
1200 ? RTPATHMATCHOP_CODEPOINT_IN_SET_ASCII7 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_ASCII7;
1201 else
1202 pInstr->enmOpCode = !fInverted
1203 ? RTPATHMATCHOP_CODEPOINT_IN_SET_EXTENDED
1204 : RTPATHMATCHOP_CODEPOINT_NOT_IN_SET_EXTENDED;
1205 pchPattern += off;
1206 cchPattern -= off;
1207 offInput += 1;
1208 continue;
1209 }
1210
1211 /* else: invalid, treat it as */
1212 AssertFailed();
1213 }
1214 }
1215 /*
1216 * Variable matching.
1217 */
1218 else if (ch == '$')
1219 {
1220 const char *pchFound;
1221 if ( cchPattern > 3
1222 && pchPattern[1] == '{'
1223 && (pchFound = (const char *)memchr(pchPattern + 2, '}', cchPattern)) != NULL
1224 && pchFound != &pchPattern[2])
1225 {
1226 /* skip to the variable name. */
1227 pchPattern += 2;
1228 cchPattern -= 2;
1229 size_t cchVarNm = pchFound - pchPattern;
1230
1231 /* Look it up. */
1232 uint32_t iVar;
1233 for (iVar = 0; iVar < RT_ELEMENTS(g_aVariables); iVar++)
1234 if ( g_aVariables[iVar].cchName == cchVarNm
1235 && memcmp(g_aVariables[iVar].pszName, pchPattern, cchVarNm) == 0)
1236 break;
1237 if (iVar < RT_ELEMENTS(g_aVariables))
1238 {
1239 pInstr->uOp2 = (uint16_t)iVar;
1240 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_VARIABLE_VALUE_CMP : RTPATHMATCHOP_VARIABLE_VALUE_ICMP;
1241 pInstr->pch = pchPattern; /* not necessary */
1242 pInstr->cch = (uint16_t)cchPattern; /* ditto */
1243 pchPattern += cchVarNm + 1;
1244 cchPattern -= cchVarNm + 1;
1245 AssertMsgReturn(!g_aVariables[iVar].fFirstOnly || iFirst + 1U == pAllocator->iNext,
1246 ("Glob variable '%s' should be first\n", g_aVariables[iVar].pszName),
1247 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1248 /* cchInput unchanged, value can be empty. */
1249 continue;
1250 }
1251 AssertMsgFailedReturn(("Unknown path matching variable '%.*s'\n", cchVarNm, pchPattern),
1252 VERR_PATH_MATCH_UNKNOWN_VARIABLE);
1253 }
1254 }
1255 else
1256 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1257 }
1258
1259 /*
1260 * Plain text. Look for the next meta char.
1261 */
1262 uint32_t cchPlain = 1;
1263 while (cchPlain < cchPattern)
1264 {
1265 ch = pchPattern[cchPlain];
1266 if (!ASMBitTest(s_bmMetaChars, (uint8_t)ch))
1267 { /* probable */ }
1268 else if ( ch == '?'
1269 || ch == '*')
1270 break;
1271 else if (ch == '$')
1272 {
1273 const char *pchFound;
1274 if ( cchPattern > cchPlain + 3
1275 && pchPattern[cchPlain + 1] == '{'
1276 && (pchFound = (const char *)memchr(&pchPattern[cchPlain + 2], '}', cchPattern - cchPlain - 2)) != NULL
1277 && pchFound != &pchPattern[cchPlain + 2])
1278 break;
1279 }
1280 else if (ch == '[')
1281 {
1282 /* We don't put a lot of effort into getting this 100% right here,
1283 no point it complicating things for malformed expressions. */
1284 if ( cchPattern > cchPlain + 2
1285 && memchr(&pchPattern[cchPlain + 2], ']', cchPattern - cchPlain - 1) != NULL)
1286 break;
1287 }
1288 else
1289 AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* broken bitmap / compiler codeset */
1290 cchPlain++;
1291 }
1292 pInstr->enmOpCode = !fIgnoreCase ? RTPATHMATCHOP_STRCMP : RTPATHMATCHOP_STRICMP;
1293 pInstr->cch = cchPlain;
1294 Assert(pInstr->pch == pchPattern);
1295 Assert(pInstr->uOp2 == 0);
1296 pchPattern += cchPlain;
1297 cchPattern -= cchPlain;
1298 offInput += cchPlain;
1299 }
1300
1301 /*
1302 * Optimize zero-or-more matching.
1303 */
1304 while (cZeroOrMore-- > 0)
1305 {
1306 PRTPATHMATCHCORE pInstr = &pAllocator->paInstructions[aiZeroOrMore[cZeroOrMore]];
1307 pInstr->uOp2 = (uint16_t)(offInput - pInstr->uOp2);
1308 }
1309
1310 /** @todo It's possible to use offInput to inject a instruction for checking
1311 * minimum input length at the start of the program. Not sure it's
1312 * worth it though, unless it's long a complicated expression... */
1313 return VINF_SUCCESS;
1314}
1315
1316
1317/**
1318 * Parses the glob pattern.
1319 *
1320 * This compiles filename matching programs for each component and determins the
1321 * optimal search strategy for them.
1322 *
1323 * @returns IPRT status code.
1324 * @param pGlob The glob instance data.
1325 * @param pszPattern The pattern to parse.
1326 * @param pParsed The RTPathParse output for the pattern.
1327 * @param fFlags The glob flags (same as pGlob->fFlags).
1328 */
1329static int rtPathGlobParse(PRTPATHGLOB pGlob, const char *pszPattern, PRTPATHPARSED pParsed, uint32_t fFlags)
1330{
1331 AssertReturn(pParsed->cComps > 0, VERR_INVALID_PARAMETER); /* shouldn't happen */
1332 uint32_t iComp = 0;
1333
1334 /*
1335 * If we've got a rootspec, mark it as plain. On platforms with
1336 * drive letter and/or UNC we don't allow wildcards or such in
1337 * the drive letter spec or UNC server name. (At least not yet.)
1338 */
1339 if (RTPATH_PROP_HAS_ROOT_SPEC(pParsed->fProps))
1340 {
1341 AssertReturn(pParsed->aComps[0].cch < sizeof(pGlob->szPath) - 1, VERR_FILENAME_TOO_LONG);
1342 memcpy(pGlob->szPath, &pszPattern[pParsed->aComps[0].off], pParsed->aComps[0].cch);
1343 pGlob->offFirstPath = pParsed->aComps[0].cch;
1344 pGlob->iFirstComp = iComp = 1;
1345 }
1346 else
1347 {
1348 const char * const pszComp = &pszPattern[pParsed->aComps[0].off];
1349
1350 /*
1351 * The tilde is only applicable to the first component, expand it
1352 * immediately.
1353 */
1354 if ( *pszComp == '~'
1355 && !(fFlags & RTPATHGLOB_F_NO_TILDE))
1356 {
1357 if (pParsed->aComps[0].cch == 1)
1358 {
1359 int rc = RTPathUserHome(pGlob->szPath, sizeof(pGlob->szPath) - 1);
1360 AssertRCReturn(rc, rc);
1361 }
1362 else
1363 AssertMsgFailedReturn(("'%.*s' is not supported yet\n", pszComp, pParsed->aComps[0].cch),
1364 VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED);
1365 pGlob->offFirstPath = (uint32_t)RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1366 pGlob->iFirstComp = iComp = 1;
1367 }
1368 }
1369
1370 /*
1371 * Process the other components.
1372 */
1373 bool fStarStar = false;
1374 for (; iComp < pParsed->cComps; iComp++)
1375 {
1376 const char *pszComp = &pszPattern[pParsed->aComps[iComp].off];
1377 uint16_t cchComp = pParsed->aComps[iComp].cch;
1378 Assert(pGlob->aComps[iComp].fNormal == false);
1379
1380 pGlob->aComps[iComp].fDir = iComp + 1 < pParsed->cComps || (fFlags & RTPATHGLOB_F_ONLY_DIRS);
1381 if ( cchComp != 2
1382 || pszComp[0] != '*'
1383 || pszComp[1] != '*'
1384 || (fFlags & RTPATHGLOB_F_NO_STARSTAR) )
1385 {
1386 /* Compile the pattern. */
1387 uint16_t const iMatchProg = pGlob->MatchInstrAlloc.iNext;
1388 pGlob->aComps[iComp].iMatchProg = iMatchProg;
1389 int rc = rtPathMatchCompile(pszComp, cchComp, RT_BOOL(fFlags & RTPATHGLOB_F_IGNORE_CASE),
1390 &pGlob->MatchInstrAlloc);
1391 if (RT_FAILURE(rc))
1392 return rc;
1393
1394 /* Check for plain text as well as full variable matching (not applicable after '**'). */
1395 uint16_t const cInstructions = pGlob->MatchInstrAlloc.iNext - iMatchProg;
1396 if ( cInstructions == 2
1397 && !fStarStar
1398 && pGlob->MatchInstrAlloc.paInstructions[iMatchProg + 1].enmOpCode == RTPATHMATCHOP_RETURN_MATCH_IF_AT_END)
1399 {
1400 if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRCMP
1401 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_STRICMP)
1402 pGlob->aComps[iComp].fPlain = true;
1403 else if ( pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1404 || pGlob->MatchInstrAlloc.paInstructions[iMatchProg].enmOpCode == RTPATHMATCHOP_VARIABLE_VALUE_ICMP)
1405 {
1406 pGlob->aComps[iComp].fExpVariable = true;
1407 AssertMsgReturn( iComp == 0
1408 || !g_aVariables[pGlob->MatchInstrAlloc.paInstructions[iMatchProg].uOp2].fFirstOnly,
1409 ("Glob variable '%.*s' can only be used as the path component.\n", cchComp, pszComp),
1410 VERR_PATH_MATCH_VARIABLE_MUST_BE_FIRST);
1411 }
1412 else
1413 pGlob->aComps[iComp].fNormal = true;
1414 }
1415 else
1416 pGlob->aComps[iComp].fNormal = true;
1417 }
1418 else
1419 {
1420 /* Recursive "**" matching. */
1421 pGlob->aComps[iComp].fNormal = false;
1422 pGlob->aComps[iComp].fStarStar = true;
1423 AssertReturn(!fStarStar, VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED); /** @todo implement multiple '**' sequences in a pattern. */
1424 fStarStar = true;
1425 }
1426 }
1427 pGlob->aComps[pParsed->cComps - 1].fFinal = true;
1428
1429 return VINF_SUCCESS;
1430}
1431
1432
1433/**
1434 * This is for skipping overly long directories entries.
1435 *
1436 * Since our directory entry buffer can hold filenames of RTPATH_MAX bytes, we
1437 * can safely skip filenames that are longer. There are very few file systems
1438 * that can actually store filenames longer than 255 bytes at time of coding
1439 * (2015-09), and extremely few which can exceed 4096 (RTPATH_MAX) bytes.
1440 *
1441 * @returns IPRT status code.
1442 * @param hDir The directory handle.
1443 * @param cbNeeded The required entry size.
1444 */
1445DECL_NO_INLINE(static, int) rtPathGlobSkipDirEntry(PRTDIR hDir, size_t cbNeeded)
1446{
1447 int rc = VERR_BUFFER_OVERFLOW;
1448 cbNeeded = RT_ALIGN_Z(cbNeeded, 16);
1449 PRTDIRENTRY pDirEntry = (PRTDIRENTRY)RTMemTmpAlloc(cbNeeded);
1450 if (pDirEntry)
1451 {
1452 rc = RTDirRead(hDir, pDirEntry, &cbNeeded);
1453 RTMemTmpFree(pDirEntry);
1454 }
1455 return rc;
1456}
1457
1458
1459/**
1460 * Adds a result.
1461 *
1462 * @returns IPRT status code.
1463 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1464 *
1465 * @param pGlob The glob instance data.
1466 * @param cchPath The number of bytes to add from pGlob->szPath.
1467 * @param uType The RTDIRENTRYTYPE value.
1468 */
1469DECL_NO_INLINE(static, int) rtPathGlobAddResult(PRTPATHGLOB pGlob, size_t cchPath, uint8_t uType)
1470{
1471 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1472 {
1473 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + 1]));
1474 if (pEntry)
1475 {
1476 pEntry->uType = uType;
1477 pEntry->cchPath = (uint16_t)cchPath;
1478 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1479 pEntry->szPath[cchPath] = '\0';
1480
1481 pEntry->pNext = NULL;
1482 *pGlob->ppNext = pEntry;
1483 pGlob->ppNext = &pEntry->pNext;
1484 pGlob->cResults++;
1485
1486 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1487 return VINF_SUCCESS;
1488 return VINF_CALLBACK_RETURN;
1489 }
1490 return VERR_NO_MEMORY;
1491 }
1492 return VERR_TOO_MUCH_DATA;
1493}
1494
1495
1496/**
1497 * Adds a result, constructing the path from two string.
1498 *
1499 * @returns IPRT status code.
1500 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1501 *
1502 * @param pGlob The glob instance data.
1503 * @param cchPath The number of bytes to add from pGlob->szPath.
1504 * @param pchName The string (usual filename) to append to the szPath.
1505 * @param cchName The length of the string to append.
1506 * @param uType The RTDIRENTRYTYPE value.
1507 */
1508DECL_NO_INLINE(static, int) rtPathGlobAddResult2(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1509 uint8_t uType)
1510{
1511 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1512 {
1513 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1514 if (pEntry)
1515 {
1516 pEntry->uType = uType;
1517 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1518 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1519 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1520 pEntry->szPath[cchPath + cchName] = '\0';
1521
1522 pEntry->pNext = NULL;
1523 *pGlob->ppNext = pEntry;
1524 pGlob->ppNext = &pEntry->pNext;
1525 pGlob->cResults++;
1526
1527 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1528 return VINF_SUCCESS;
1529 return VINF_CALLBACK_RETURN;
1530 }
1531 return VERR_NO_MEMORY;
1532 }
1533 return VERR_TOO_MUCH_DATA;
1534}
1535
1536
1537/**
1538 * Prepares a result, constructing the path from two string.
1539 *
1540 * The caller must call either rtPathGlobCommitResult or
1541 * rtPathGlobRollbackResult to complete the operation.
1542 *
1543 * @returns IPRT status code.
1544 * @retval VINF_CALLBACK_RETURN if we can stop searching.
1545 *
1546 * @param pGlob The glob instance data.
1547 * @param cchPath The number of bytes to add from pGlob->szPath.
1548 * @param pchName The string (usual filename) to append to the szPath.
1549 * @param cchName The length of the string to append.
1550 * @param uType The RTDIRENTRYTYPE value.
1551 */
1552DECL_NO_INLINE(static, int) rtPathGlobAlmostAddResult(PRTPATHGLOB pGlob, size_t cchPath, const char *pchName, size_t cchName,
1553 uint8_t uType)
1554{
1555 if (pGlob->cResults < RTPATHGLOB_MAX_RESULTS)
1556 {
1557 PRTPATHGLOBENTRY pEntry = (PRTPATHGLOBENTRY)RTMemAlloc(RT_OFFSETOF(RTPATHGLOBENTRY, szPath[cchPath + cchName + 1]));
1558 if (pEntry)
1559 {
1560 pEntry->uType = uType;
1561 pEntry->cchPath = (uint16_t)(cchPath + cchName);
1562 memcpy(pEntry->szPath, pGlob->szPath, cchPath);
1563 memcpy(&pEntry->szPath[cchPath], pchName, cchName);
1564 pEntry->szPath[cchPath + cchName] = '\0';
1565
1566 pEntry->pNext = NULL;
1567 *pGlob->ppNext = pEntry;
1568 /* Note! We don't update ppNext here, that is done in rtPathGlobCommitResult. */
1569
1570 if (!(pGlob->fFlags & RTPATHGLOB_F_FIRST_ONLY))
1571 return VINF_SUCCESS;
1572 return VINF_CALLBACK_RETURN;
1573 }
1574 return VERR_NO_MEMORY;
1575 }
1576 return VERR_TOO_MUCH_DATA;
1577}
1578
1579
1580/**
1581 * Commits a pending result from rtPathGlobAlmostAddResult.
1582 *
1583 * @param pGlob The glob instance data.
1584 * @param uType The RTDIRENTRYTYPE value.
1585 */
1586static void rtPathGlobCommitResult(PRTPATHGLOB pGlob, uint8_t uType)
1587{
1588 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1589 AssertPtr(pEntry);
1590 pEntry->uType = uType;
1591 pGlob->ppNext = &pEntry->pNext;
1592 pGlob->cResults++;
1593}
1594
1595
1596/**
1597 * Rolls back a pending result from rtPathGlobAlmostAddResult.
1598 *
1599 * @param pGlob The glob instance data.
1600 */
1601static void rtPathGlobRollbackResult(PRTPATHGLOB pGlob)
1602{
1603 PRTPATHGLOBENTRY pEntry = *pGlob->ppNext;
1604 AssertPtr(pEntry);
1605 RTMemFree(pEntry);
1606 *pGlob->ppNext = NULL;
1607}
1608
1609
1610
1611/**
1612 * Whether to call rtPathGlobExecRecursiveVarExp for the next component.
1613 *
1614 * @returns true / false.
1615 * @param pGlob The glob instance data.
1616 * @param offPath The next path offset/length.
1617 * @param iComp The next component.
1618 */
1619DECLINLINE(bool) rtPathGlobExecIsExpVar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1620{
1621 return pGlob->aComps[iComp].fExpVariable
1622 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1623 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1624}
1625
1626/**
1627 * Whether to call rtPathGlobExecRecursivePlainText for the next component.
1628 *
1629 * @returns true / false.
1630 * @param pGlob The glob instance data.
1631 * @param offPath The next path offset/length.
1632 * @param iComp The next component.
1633 */
1634DECLINLINE(bool) rtPathGlobExecIsPlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1635{
1636 return pGlob->aComps[iComp].fPlain
1637 && ( !(pGlob->fFlags & RTPATHGLOB_F_IGNORE_CASE)
1638 || (offPath ? !RTFsIsCaseSensitive(pGlob->szPath) : !RTFsIsCaseSensitive(".")) );
1639}
1640
1641
1642/**
1643 * Helper for rtPathGlobExecRecursiveVarExp and rtPathGlobExecRecursivePlainText
1644 * that compares a file mode mask with dir/no-dir wishes of the caller.
1645 *
1646 * @returns true if match, false if not.
1647 * @param pGlob The glob instance data.
1648 * @param fMode The file mode (only the type is used).
1649 */
1650DECLINLINE(bool) rtPathGlobExecIsMatchFinalWithFileMode(PRTPATHGLOB pGlob, RTFMODE fMode)
1651{
1652 if (!(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)))
1653 return true;
1654 return RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS) == RTFS_IS_DIRECTORY(fMode);
1655}
1656
1657
1658/**
1659 * Recursive globbing - star-star mode.
1660 *
1661 * @returns IPRT status code.
1662 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1663 *
1664 * @param pGlob The glob instance data.
1665 * @param offPath The current path offset/length.
1666 * @param iStarStarComp The star-star component index.
1667 * @param offStarStarPath The offset of the star-star component in the
1668 * pattern path.
1669 */
1670DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveStarStar(PRTPATHGLOB pGlob, size_t offPath, uint32_t iStarStarComp,
1671 size_t offStarStarPath)
1672{
1673 /** @todo implement multi subdir matching. */
1674 RT_NOREF_PV(pGlob);
1675 RT_NOREF_PV(offPath);
1676 RT_NOREF_PV(iStarStarComp);
1677 RT_NOREF_PV(offStarStarPath);
1678 return VERR_PATH_MATCH_FEATURE_NOT_IMPLEMENTED;
1679}
1680
1681
1682
1683/**
1684 * Recursive globbing - variable expansion optimization.
1685 *
1686 * @returns IPRT status code.
1687 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1688 *
1689 * @param pGlob The glob instance data.
1690 * @param offPath The current path offset/length.
1691 * @param iComp The current component.
1692 */
1693DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveVarExp(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1694{
1695 Assert(iComp < pGlob->pParsed->cComps);
1696 Assert(pGlob->szPath[offPath] == '\0');
1697 Assert(pGlob->aComps[iComp].fExpVariable);
1698 Assert(!pGlob->aComps[iComp].fPlain);
1699 Assert(!pGlob->aComps[iComp].fStarStar);
1700 Assert(rtPathGlobExecIsExpVar(pGlob, offPath, iComp));
1701
1702 /*
1703 * Fish the variable index out of the first matching instruction.
1704 */
1705 Assert( pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1706 == RTPATHMATCHOP_VARIABLE_VALUE_CMP
1707 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1708 == RTPATHMATCHOP_VARIABLE_VALUE_ICMP);
1709 uint16_t const iVar = pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].uOp2;
1710
1711 /*
1712 * Enumerate all the variable, giving them the plain text treatment.
1713 */
1714 for (uint32_t iItem = 0; iItem < RTPATHMATCH_MAX_VAR_ITEMS; iItem++)
1715 {
1716 size_t cch;
1717 int rcVar = g_aVariables[iVar].pfnQuery(iItem, &pGlob->szPath[offPath], sizeof(pGlob->szPath) - offPath, &cch,
1718 &pGlob->MatchCache);
1719 if (RT_SUCCESS(rcVar))
1720 {
1721 Assert(pGlob->szPath[offPath + cch] == '\0');
1722
1723 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1724 if (RT_SUCCESS(rc))
1725 {
1726 if (pGlob->aComps[iComp].fFinal)
1727 {
1728 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1729 {
1730 rc = rtPathGlobAddResult(pGlob, cch,
1731 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1732 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1733 if (rc != VINF_SUCCESS)
1734 return rc;
1735 }
1736 }
1737 else if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1738 {
1739 Assert(pGlob->aComps[iComp].fDir);
1740 cch = RTPathEnsureTrailingSeparator(pGlob->szPath, sizeof(pGlob->szPath));
1741 if (cch > 0)
1742 {
1743 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1744 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1745 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1746 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1747 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1748 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1749 else
1750 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1751 if (rc != VINF_SUCCESS)
1752 return rc;
1753 }
1754 else
1755 pGlob->cPathOverflows++;
1756 }
1757 }
1758 /* else: file doesn't exist or something else is wrong, ignore this. */
1759 if (rcVar == VINF_EOF)
1760 return VINF_SUCCESS;
1761 }
1762 else if (rcVar == VERR_EOF)
1763 return VINF_SUCCESS;
1764 else if (rcVar != VERR_TRY_AGAIN)
1765 {
1766 Assert(rcVar == VERR_BUFFER_OVERFLOW);
1767 pGlob->cPathOverflows++;
1768 }
1769 }
1770 AssertFailedReturn(VINF_SUCCESS); /* Too many items returned, probably buggy query method. */
1771}
1772
1773
1774/**
1775 * Recursive globbing - plain text optimization.
1776 *
1777 * @returns IPRT status code.
1778 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1779 *
1780 * @param pGlob The glob instance data.
1781 * @param offPath The current path offset/length.
1782 * @param iComp The current component.
1783 */
1784DECL_NO_INLINE(static, int) rtPathGlobExecRecursivePlainText(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1785{
1786 /*
1787 * Instead of recursing, we loop thru adjacent plain text components.
1788 */
1789 for (;;)
1790 {
1791 /*
1792 * Preconditions.
1793 */
1794 Assert(iComp < pGlob->pParsed->cComps);
1795 Assert(pGlob->szPath[offPath] == '\0');
1796 Assert(pGlob->aComps[iComp].fPlain);
1797 Assert(!pGlob->aComps[iComp].fExpVariable);
1798 Assert(!pGlob->aComps[iComp].fStarStar);
1799 Assert(rtPathGlobExecIsPlainText(pGlob, offPath, iComp));
1800 Assert(pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1801 == RTPATHMATCHOP_STRCMP
1802 || pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg].enmOpCode
1803 == RTPATHMATCHOP_STRICMP);
1804
1805 /*
1806 * Add the plain text component to the path.
1807 */
1808 size_t const cch = pGlob->pParsed->aComps[iComp].cch;
1809 if (cch + pGlob->aComps[iComp].fDir < sizeof(pGlob->szPath) - offPath)
1810 {
1811 memcpy(&pGlob->szPath[offPath], &pGlob->pszPattern[pGlob->pParsed->aComps[iComp].off], cch);
1812 offPath += cch;
1813 pGlob->szPath[offPath] = '\0';
1814
1815 /*
1816 * Check if it exists.
1817 */
1818 int rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
1819 if (RT_SUCCESS(rc))
1820 {
1821 if (pGlob->aComps[iComp].fFinal)
1822 {
1823 if (rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
1824 return rtPathGlobAddResult(pGlob, offPath,
1825 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK)
1826 >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
1827 break;
1828 }
1829
1830 if (RTFS_IS_DIRECTORY(pGlob->u.ObjInfo.Attr.fMode))
1831 {
1832 Assert(pGlob->aComps[iComp].fDir);
1833 pGlob->szPath[offPath++] = RTPATH_SLASH;
1834 pGlob->szPath[offPath] = '\0';
1835
1836 iComp++;
1837 if (rtPathGlobExecIsExpVar(pGlob, offPath, iComp))
1838 return rtPathGlobExecRecursiveVarExp(pGlob, offPath, iComp);
1839 if (!rtPathGlobExecIsPlainText(pGlob, offPath, iComp))
1840 return rtPathGlobExecRecursiveGeneric(pGlob, offPath, iComp);
1841 if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1842 return rtPathGlobExecRecursiveStarStar(pGlob, offPath, iComp, offPath);
1843
1844 /* Continue with the next plain text component. */
1845 continue;
1846 }
1847 }
1848 /* else: file doesn't exist or something else is wrong, ignore this. */
1849 }
1850 else
1851 pGlob->cPathOverflows++;
1852 break;
1853 }
1854 return VINF_SUCCESS;
1855}
1856
1857
1858/**
1859 * Recursive globbing - generic.
1860 *
1861 * @returns IPRT status code.
1862 * @retval VINF_CALLBACK_RETURN is used to implement RTPATHGLOB_F_FIRST_ONLY.
1863 *
1864 * @param pGlob The glob instance data.
1865 * @param offPath The current path offset/length.
1866 * @param iComp The current component.
1867 */
1868DECL_NO_INLINE(static, int) rtPathGlobExecRecursiveGeneric(PRTPATHGLOB pGlob, size_t offPath, uint32_t iComp)
1869{
1870 /*
1871 * Enumerate entire directory and match each entry.
1872 */
1873 PRTDIR hDir;
1874 int rc = RTDirOpen(&hDir, offPath ? pGlob->szPath : ".");
1875 if (RT_SUCCESS(rc))
1876 {
1877 for (;;)
1878 {
1879 size_t cch = sizeof(pGlob->u);
1880 rc = RTDirRead(hDir, &pGlob->u.DirEntry, &cch);
1881 if (RT_SUCCESS(rc))
1882 {
1883 if (pGlob->aComps[iComp].fFinal)
1884 {
1885 /*
1886 * Final component: Check if it matches the current pattern.
1887 */
1888 if ( !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS))
1889 || RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1890 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY)
1891 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1892 {
1893 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1894 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1895 &pGlob->MatchCache);
1896 if (RT_SUCCESS(rc))
1897 {
1898 /* Construct the result. */
1899 if ( pGlob->u.DirEntry.enmType != RTDIRENTRYTYPE_UNKNOWN
1900 || !(pGlob->fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) )
1901 rc = rtPathGlobAddResult2(pGlob, offPath, pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1902 (uint8_t)pGlob->u.DirEntry.enmType);
1903 else
1904 {
1905 rc = rtPathGlobAlmostAddResult(pGlob, offPath,
1906 pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1907 (uint8_t)RTDIRENTRYTYPE_UNKNOWN);
1908 if (RT_SUCCESS(rc))
1909 {
1910 RTDirQueryUnknownType((*pGlob->ppNext)->szPath, false /*fFollowSymlinks*/,
1911 &pGlob->u.DirEntry.enmType);
1912 if ( RT_BOOL(pGlob->fFlags & RTPATHGLOB_F_ONLY_DIRS)
1913 == (pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY))
1914 rtPathGlobCommitResult(pGlob, (uint8_t)pGlob->u.DirEntry.enmType);
1915 else
1916 rtPathGlobRollbackResult(pGlob);
1917 }
1918 }
1919 if (rc != VINF_SUCCESS)
1920 break;
1921 }
1922 else
1923 {
1924 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1925 rc = VINF_SUCCESS;
1926 }
1927 }
1928 }
1929 /*
1930 * Intermediate component: Directories only.
1931 */
1932 else if ( pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY
1933 || pGlob->u.DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
1934 {
1935 rc = rtPathMatchExec(pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName,
1936 &pGlob->MatchInstrAlloc.paInstructions[pGlob->aComps[iComp].iMatchProg],
1937 &pGlob->MatchCache);
1938 if (RT_SUCCESS(rc))
1939 {
1940 /* Recurse down into the alleged directory. */
1941 cch = offPath + pGlob->u.DirEntry.cbName;
1942 if (cch + 1 < sizeof(pGlob->szPath))
1943 {
1944 memcpy(&pGlob->szPath[offPath], pGlob->u.DirEntry.szName, pGlob->u.DirEntry.cbName);
1945 pGlob->szPath[cch++] = RTPATH_SLASH;
1946 pGlob->szPath[cch] = '\0';
1947
1948 if (rtPathGlobExecIsExpVar(pGlob, cch, iComp + 1))
1949 rc = rtPathGlobExecRecursiveVarExp(pGlob, cch, iComp + 1);
1950 else if (rtPathGlobExecIsPlainText(pGlob, cch, iComp + 1))
1951 rc = rtPathGlobExecRecursivePlainText(pGlob, cch, iComp + 1);
1952 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
1953 rc = rtPathGlobExecRecursiveStarStar(pGlob, cch, iComp + 1, cch);
1954 else
1955 rc = rtPathGlobExecRecursiveGeneric(pGlob, cch, iComp + 1);
1956 if (rc != VINF_SUCCESS)
1957 return rc;
1958 }
1959 else
1960 pGlob->cPathOverflows++;
1961 }
1962 else
1963 {
1964 AssertMsgBreak(rc == VERR_MISMATCH, ("%Rrc\n", rc));
1965 rc = VINF_SUCCESS;
1966 }
1967 }
1968 }
1969 /*
1970 * RTDirRead failure.
1971 */
1972 else
1973 {
1974 /* The end? */
1975 if (rc == VERR_NO_MORE_FILES)
1976 rc = VINF_SUCCESS;
1977 /* Try skip the entry if we end up with an overflow (szPath can't hold it either then). */
1978 else if (rc == VERR_BUFFER_OVERFLOW)
1979 {
1980 pGlob->cPathOverflows++;
1981 rc = rtPathGlobSkipDirEntry(hDir, cch);
1982 if (RT_SUCCESS(rc))
1983 continue;
1984 }
1985 /* else: Any other error is unexpected and should be reported. */
1986 break;
1987 }
1988 }
1989
1990 RTDirClose(hDir);
1991 }
1992 /* Directory doesn't exist or something else is wrong, ignore this. */
1993 else
1994 rc = VINF_SUCCESS;
1995 return rc;
1996}
1997
1998
1999/**
2000 * Executes a glob search.
2001 *
2002 * @returns IPRT status code.
2003 * @param pGlob The glob instance data.
2004 */
2005static int rtPathGlobExec(PRTPATHGLOB pGlob)
2006{
2007 Assert(pGlob->offFirstPath < sizeof(pGlob->szPath));
2008 Assert(pGlob->szPath[pGlob->offFirstPath] == '\0');
2009
2010 int rc;
2011 if (RT_LIKELY(pGlob->iFirstComp < pGlob->pParsed->cComps))
2012 {
2013 /*
2014 * Call the appropriate function.
2015 */
2016 if (rtPathGlobExecIsExpVar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2017 rc = rtPathGlobExecRecursiveVarExp(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2018 else if (rtPathGlobExecIsPlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp))
2019 rc = rtPathGlobExecRecursivePlainText(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2020 else if (pGlob->aComps[pGlob->iFirstComp].fStarStar)
2021 rc = rtPathGlobExecRecursiveStarStar(pGlob, pGlob->offFirstPath, pGlob->iFirstComp, pGlob->offFirstPath);
2022 else
2023 rc = rtPathGlobExecRecursiveGeneric(pGlob, pGlob->offFirstPath, pGlob->iFirstComp);
2024 }
2025 else
2026 {
2027 /*
2028 * Special case where we only have a root component or tilde expansion.
2029 */
2030 Assert(pGlob->offFirstPath > 0);
2031 rc = RTPathQueryInfoEx(pGlob->szPath, &pGlob->u.ObjInfo, RTFSOBJATTRADD_NOTHING, RTPATH_F_FOLLOW_LINK);
2032 if ( RT_SUCCESS(rc)
2033 && rtPathGlobExecIsMatchFinalWithFileMode(pGlob, pGlob->u.ObjInfo.Attr.fMode))
2034 rc = rtPathGlobAddResult(pGlob, pGlob->offFirstPath,
2035 (pGlob->u.ObjInfo.Attr.fMode & RTFS_TYPE_MASK) >> RTFS_TYPE_DIRENTRYTYPE_SHIFT);
2036 else
2037 rc = VINF_SUCCESS;
2038 }
2039
2040 /*
2041 * Adjust the status code. Check for results, hide RTPATHGLOB_F_FIRST_ONLY
2042 * status code, and add warning if necessary.
2043 */
2044 if (pGlob->cResults > 0)
2045 {
2046 if (rc == VINF_CALLBACK_RETURN)
2047 rc = VINF_SUCCESS;
2048 if (rc == VINF_SUCCESS)
2049 {
2050 if (pGlob->cPathOverflows > 0)
2051 rc = VINF_BUFFER_OVERFLOW;
2052 }
2053 }
2054 else
2055 rc = VERR_FILE_NOT_FOUND;
2056
2057 return rc;
2058}
2059
2060
2061RTDECL(int) RTPathGlob(const char *pszPattern, uint32_t fFlags, PPCRTPATHGLOBENTRY ppHead, uint32_t *pcResults)
2062{
2063 /*
2064 * Input validation.
2065 */
2066 AssertPtrReturn(ppHead, VERR_INVALID_POINTER);
2067 *ppHead = NULL;
2068 if (pcResults)
2069 {
2070 AssertPtrReturn(pcResults, VERR_INVALID_POINTER);
2071 *pcResults = 0;
2072 }
2073 AssertPtrReturn(pszPattern, VERR_INVALID_POINTER);
2074 AssertReturn(!(fFlags & ~RTPATHGLOB_F_MASK), VERR_INVALID_FLAGS);
2075 AssertReturn((fFlags & (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS)) != (RTPATHGLOB_F_NO_DIRS | RTPATHGLOB_F_ONLY_DIRS),
2076 VERR_INVALID_FLAGS);
2077
2078 /*
2079 * Parse the path.
2080 */
2081 size_t cbParsed = RT_OFFSETOF(RTPATHPARSED, aComps[1]); /** @todo 16 after testing */
2082 PRTPATHPARSED pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2083 AssertReturn(pParsed, VERR_NO_MEMORY);
2084 int rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2085 if (rc == VERR_BUFFER_OVERFLOW)
2086 {
2087 cbParsed = RT_OFFSETOF(RTPATHPARSED, aComps[pParsed->cComps + 1]);
2088 RTMemTmpFree(pParsed);
2089 pParsed = (PRTPATHPARSED)RTMemTmpAlloc(cbParsed);
2090 AssertReturn(pParsed, VERR_NO_MEMORY);
2091
2092 rc = RTPathParse(pszPattern, pParsed, cbParsed, RTPATH_STR_F_STYLE_HOST);
2093 }
2094 if (RT_SUCCESS(rc))
2095 {
2096 /*
2097 * Check dir slash vs. only/not dir flag.
2098 */
2099 if ( !(fFlags & RTPATHGLOB_F_NO_DIRS)
2100 || ( !(pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2101 && ( !(pParsed->fProps & (RTPATH_PROP_ROOT_SLASH | RTPATH_PROP_UNC))
2102 || pParsed->cComps > 1) ) )
2103 {
2104 if (pParsed->fProps & RTPATH_PROP_DIR_SLASH)
2105 fFlags |= RTPATHGLOB_F_ONLY_DIRS;
2106
2107 /*
2108 * Allocate and initialize the glob state data structure.
2109 */
2110 size_t cbGlob = RT_OFFSETOF(RTPATHGLOB, aComps[pParsed->cComps + 1]);
2111 PRTPATHGLOB pGlob = (PRTPATHGLOB)RTMemTmpAllocZ(cbGlob);
2112 if (pGlob)
2113 {
2114 pGlob->pszPattern = pszPattern;
2115 pGlob->fFlags = fFlags;
2116 pGlob->pParsed = pParsed;
2117 pGlob->ppNext = &pGlob->pHead;
2118 rc = rtPathGlobParse(pGlob, pszPattern, pParsed, fFlags);
2119 if (RT_SUCCESS(rc))
2120 {
2121 /*
2122 * Execute the search.
2123 */
2124 rc = rtPathGlobExec(pGlob);
2125 if (RT_SUCCESS(rc))
2126 {
2127 *ppHead = pGlob->pHead;
2128 if (pcResults)
2129 *pcResults = pGlob->cResults;
2130 }
2131 else
2132 RTPathGlobFree(pGlob->pHead);
2133 }
2134
2135 RTMemTmpFree(pGlob->MatchInstrAlloc.paInstructions);
2136 RTMemTmpFree(pGlob);
2137 }
2138 else
2139 rc = VERR_NO_MEMORY;
2140 }
2141 else
2142 rc = VERR_NOT_FOUND;
2143 }
2144 RTMemTmpFree(pParsed);
2145 return rc;
2146
2147
2148}
2149
2150
2151RTDECL(void) RTPathGlobFree(PCRTPATHGLOBENTRY pHead)
2152{
2153 PRTPATHGLOBENTRY pCur = (PRTPATHGLOBENTRY)pHead;
2154 while (pCur)
2155 {
2156 PRTPATHGLOBENTRY pNext = pCur->pNext;
2157 pCur->pNext = NULL;
2158 RTMemFree(pCur);
2159 pCur = pNext;
2160 }
2161}
2162
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette