• R/O
  • SSH
  • HTTPS

yash: Commit


Commit MetaInfo

Révision4156 (tree)
l'heure2020-12-05 21:49:54
Auteurmagicant

Message de Log

Regex-specific escaping (#39094)

Change Summary

Modification

--- yash/trunk/NEWS (revision 4155)
+++ yash/trunk/NEWS (revision 4156)
@@ -28,6 +28,9 @@
2828 shell is now always implicitly redirected to /dev/null,
2929 regardless of whether the standard input has already been
3030 redirected.
31+ = Quoted characters are now handled in (almost) the same way as
32+ Bash in the regular expression in the "[[ word =~ regex ]]"
33+ syntax.
3134 * The "command" built-in with the -v or -V option was printing
3235 the pathnames of external commands with a redundant leading slash
3336 when the current working directory is "/" or "//".
@@ -45,7 +48,7 @@
4548 character.
4649 * When there are no positional parameters, the nested expansion
4750 "${{@}}" now expands to nothing rather than one empty field.
48- * Unquoted parentheses and vertical bars now can be used in a
51+ * Unquoted parentheses and vertical bars now can be used in the
4952 regular expression in the "[[ word =~ regex ]]" syntax.
5053
5154 ----------------------------------------------------------------------
--- yash/trunk/builtins/test.c (revision 4155)
+++ yash/trunk/builtins/test.c (revision 4156)
@@ -97,6 +97,8 @@
9797 __attribute__((nonnull));
9898 static wchar_t *quote_removal_for_regex(const wchar_t *s, const char *cc)
9999 __attribute__((nonnull,malloc,warn_unused_result));
100+static const wchar_t *skip_bracket(const wchar_t *s)
101+ __attribute__((nonnull,pure,warn_unused_result));
100102 #endif
101103
102104
@@ -871,19 +873,78 @@
871873 * string. */
872874 wchar_t *quote_removal_for_regex(const wchar_t *s, const char *cc)
873875 {
874- xwcsbuf_T result;
875- wb_initwithmax(&result, mul(wcslen(s), 2));
876+ /* First, remove quotations. */
877+ xwcsbuf_T tmp;
878+ xstrbuf_T tmpcc;
879+ size_t sizehint = wcslen(s);
880+ wb_initwithmax(&tmp, sizehint);
881+ sb_initwithmax(&tmpcc, sizehint);
876882 for (size_t i = 0; s[i] != L'\0'; i++) {
877883 if (cc[i] & CC_QUOTATION)
878884 continue;
879- if (cc[i] & CC_QUOTED)
880- if (wcschr(L"^.[$()|*+?{\\", s[i]) != NULL)
885+ wb_wccat(&tmp, s[i]);
886+ sb_ccat(&tmpcc, cc[i]);
887+ }
888+
889+ /* Next, escape unquoted special chars outside brackets */
890+ xwcsbuf_T result;
891+ wb_initwithmax(&result, sizehint);
892+ for (size_t i = 0; tmp.contents[i] != L'\0'; ) {
893+ if (tmpcc.contents[i] & CC_QUOTED) {
894+ if (wcschr(L"^.[$()|*+?{\\", tmp.contents[i]) != NULL)
881895 wb_wccat(&result, L'\\');
882- wb_wccat(&result, s[i]);
896+ wb_wccat(&result, tmp.contents[i++]);
897+ } else {
898+ if (tmp.contents[i] != L'[') {
899+ wb_wccat(&result, tmp.contents[i++]);
900+ } else {
901+ const wchar_t *s2 = skip_bracket(&tmp.contents[i]);
902+ size_t j = s2 - tmp.contents;
903+ while (i < j)
904+ wb_wccat(&result, tmp.contents[i++]);
905+ }
906+ }
883907 }
908+
909+ sb_destroy(&tmpcc);
910+ wb_destroy(&tmp);
884911 return wb_towcs(&result);
885912 }
886913
914+/* Skips a bracket expression in a regular expression pattern.
915+ * Returns a pointer to the character just after the closing L']' (or the
916+ * terminating null character). */
917+const wchar_t *skip_bracket(const wchar_t *s)
918+{
919+ assert(*s == L'[');
920+ s++;
921+
922+ if (*s == L'^')
923+ s++;
924+ if (*s == L']')
925+ s++;
926+
927+ while (*s != L'\0') {
928+ if (*s == L']')
929+ return s + 1;
930+ if (*s++ != L'[')
931+ continue;
932+
933+ switch (*s) {
934+ case L':': case L'.': case L'=': ;
935+ wchar_t end[] = { *s, L']', L'\0', };
936+ s++;
937+ const wchar_t *endp = wcsstr(s, end);
938+ if (endp == NULL)
939+ return s + wcslen(s);
940+ s = endp + 2;
941+ break;
942+ }
943+ }
944+
945+ return s;
946+}
947+
887948 #endif /* YASH_ENABLE_DOUBLE_BRACKET */
888949
889950
Afficher sur ancien navigateur de dépôt.