summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authortv <tv@krebsco.de>2021-01-07 21:00:04 +0100
committertv <tv@krebsco.de>2021-01-07 21:00:04 +0100
commit5b91fd162e20c054075f4535f69b6e51a8bdb952 (patch)
tree91b9a881e48bef2ce0194dd825d9e6f08a6643e6 /lib
parentfc951fa8b4950a67c55e1ab27ffd3e4039851ef1 (diff)
lib.uri: add {native,posix-extended}-regex
Diffstat (limited to 'lib')
-rw-r--r--lib/default.nix1
-rw-r--r--lib/uri.nix77
2 files changed, 78 insertions, 0 deletions
diff --git a/lib/default.nix b/lib/default.nix
index be9f60f..2efeec0 100644
--- a/lib/default.nix
+++ b/lib/default.nix
@@ -12,6 +12,7 @@ let
encodeName = replaceChars ["/"] ["\\x2f"];
};
types = nixpkgs-lib.types // import ./types.nix { inherit lib; };
+ uri = import ./uri.nix { inherit lib; };
xml = import ./xml.nix { inherit lib; };
eq = x: y: x == y;
diff --git a/lib/uri.nix b/lib/uri.nix
new file mode 100644
index 0000000..72ad390
--- /dev/null
+++ b/lib/uri.nix
@@ -0,0 +1,77 @@
+{ lib }:
+with lib;
+with builtins;
+rec {
+ # Regular expression to match URIs per RFC3986
+ # From: # http://jmrware.com/articles/2009/uri_regexp/URI_regex.html#uri-40
+ native-regex = ''
+ # RFC-3986 URI component: URI
+ [A-Za-z][A-Za-z0-9+\-.]* : # scheme ":"
+ (?: // # hier-part
+ (?: (?:[A-Za-z0-9\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})* @)?
+ (?:
+ \[
+ (?:
+ (?:
+ (?: (?:[0-9A-Fa-f]{1,4}:){6}
+ | :: (?:[0-9A-Fa-f]{1,4}:){5}
+ | (?: [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){4}
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,1} [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){3}
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,2} [0-9A-Fa-f]{1,4})? :: (?:[0-9A-Fa-f]{1,4}:){2}
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,3} [0-9A-Fa-f]{1,4})? :: [0-9A-Fa-f]{1,4}:
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,4} [0-9A-Fa-f]{1,4})? ::
+ ) (?:
+ [0-9A-Fa-f]{1,4} : [0-9A-Fa-f]{1,4}
+ | (?: (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) \.){3}
+ (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
+ )
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,5} [0-9A-Fa-f]{1,4})? :: [0-9A-Fa-f]{1,4}
+ | (?: (?:[0-9A-Fa-f]{1,4}:){0,6} [0-9A-Fa-f]{1,4})? ::
+ )
+ | [Vv][0-9A-Fa-f]+\.[A-Za-z0-9\-._~!$&'()*+,;=:]+
+ )
+ \]
+ | (?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}
+ (?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)
+ | (?:[A-Za-z0-9\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*
+ )
+ (?: : [0-9]* )?
+ (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
+ | /
+ (?: (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+
+ (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
+ )?
+ | (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+
+ (?:/ (?:[A-Za-z0-9\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})* )*
+ |
+ )
+ (?:\? (?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})* )? # [ "?" query ]
+ (?:\# (?:[A-Za-z0-9\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})* )? # [ "#" fragment ]
+ '';
+
+ posix-extended-regex =
+ let
+ removeComment = s:
+ elemAt (match "^((\\\\#|[^#])*)(#.*)?$" s) 0;
+
+ removeWhitespace =
+ replaceStrings [" "] [""];
+
+ moveDashToEndOfCharacterClass = s:
+ let
+ result = match "(.*)\\\\-([^]]+)(].*)" s;
+ s' = elemAt result 0 + elemAt result 1 + "-" + elemAt result 2;
+ in
+ if result != null then
+ moveDashToEndOfCharacterClass s'
+ else
+ s;
+ in
+ concatStrings
+ (foldl' (a: f: map f a) (splitString "\n" native-regex) [
+ removeComment
+ moveDashToEndOfCharacterClass
+ (replaceStrings ["(?:"] ["("])
+ removeWhitespace
+ ]);
+}