authentik.sources.scim.patch.lexer
1from dataclasses import dataclass 2from enum import Enum 3 4from authentik.sources.scim.constants import ( 5 SCIM_URN_GROUP, 6 SCIM_URN_SCHEMA, 7 SCIM_URN_USER, 8 SCIM_URN_USER_ENTERPRISE, 9) 10 11 12# Token types for SCIM path parsing 13class TokenType(Enum): 14 ATTRIBUTE = "ATTRIBUTE" 15 DOT = "DOT" 16 LBRACKET = "LBRACKET" 17 RBRACKET = "RBRACKET" 18 LPAREN = "LPAREN" 19 RPAREN = "RPAREN" 20 STRING = "STRING" 21 NUMBER = "NUMBER" 22 BOOLEAN = "BOOLEAN" 23 NULL = "NULL" 24 OPERATOR = "OPERATOR" 25 AND = "AND" 26 OR = "OR" 27 NOT = "NOT" 28 EOF = "EOF" 29 30 31@dataclass 32class Token: 33 type: TokenType 34 value: str 35 position: int = 0 36 37 38class SCIMPathLexer: 39 """Lexer for SCIM paths and filter expressions""" 40 41 OPERATORS = ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"] 42 43 def __init__(self, text: str): 44 self.schema_urns = [ 45 SCIM_URN_SCHEMA, 46 SCIM_URN_GROUP, 47 SCIM_URN_USER, 48 SCIM_URN_USER_ENTERPRISE, 49 ] 50 self.text = text 51 self.pos = 0 52 self.current_char = self.text[self.pos] if self.pos < len(self.text) else None 53 54 def advance(self): 55 """Move to next character""" 56 self.pos += 1 57 self.current_char = self.text[self.pos] if self.pos < len(self.text) else None 58 59 def skip_whitespace(self): 60 """Skip whitespace characters""" 61 while self.current_char and self.current_char.isspace(): 62 self.advance() 63 64 def read_string(self, quote_char): 65 """Read a quoted string""" 66 value = "" 67 self.advance() # Skip opening quote 68 69 while self.current_char and self.current_char != quote_char: 70 if self.current_char == "\\": 71 self.advance() 72 if self.current_char: 73 value += self.current_char 74 self.advance() 75 else: 76 value += self.current_char 77 self.advance() 78 79 if self.current_char == quote_char: 80 self.advance() # Skip closing quote 81 82 return value 83 84 def read_number(self): 85 """Read a number (integer or float)""" 86 value = "" 87 while self.current_char and (self.current_char.isdigit() or self.current_char == "."): 88 value += self.current_char 89 self.advance() 90 return value 91 92 def read_identifier(self): 93 """Read an identifier (attribute name or operator) - supports URN format""" 94 value = "" 95 while self.current_char and (self.current_char.isalnum() or self.current_char in "_-:"): 96 value += self.current_char 97 self.advance() 98 # If the identifier value so far is a schema URN, take that as the identifier and 99 # treat the next part as a sub_attribute 100 if value in self.schema_urns: 101 self.current_char = "." 102 return value 103 104 # Handle dots within URN identifiers (like "2.0") 105 # A dot is part of the identifier if it's followed by a digit 106 if ( 107 self.current_char == "." 108 and self.pos + 1 < len(self.text) 109 and self.text[self.pos + 1].isdigit() 110 ): 111 value += self.current_char 112 self.advance() 113 # Continue reading digits after the dot 114 while self.current_char and self.current_char.isdigit(): 115 value += self.current_char 116 self.advance() 117 118 return value 119 120 def get_next_token(self) -> Token: # noqa PLR0911 121 """Get the next token from the input""" 122 while self.current_char: 123 if self.current_char.isspace(): 124 self.skip_whitespace() 125 continue 126 127 if self.current_char == ".": 128 self.advance() 129 return Token(TokenType.DOT, ".") 130 131 if self.current_char == "[": 132 self.advance() 133 return Token(TokenType.LBRACKET, "[") 134 135 if self.current_char == "]": 136 self.advance() 137 return Token(TokenType.RBRACKET, "]") 138 139 if self.current_char == "(": 140 self.advance() 141 return Token(TokenType.LPAREN, "(") 142 143 if self.current_char == ")": 144 self.advance() 145 return Token(TokenType.RPAREN, ")") 146 147 if self.current_char in "\"'": 148 quote_char = self.current_char 149 value = self.read_string(quote_char) 150 return Token(TokenType.STRING, value) 151 152 if self.current_char.isdigit(): 153 value = self.read_number() 154 return Token(TokenType.NUMBER, value) 155 156 if self.current_char.isalpha() or self.current_char == "_": 157 value = self.read_identifier() 158 159 # Check for special keywords 160 if value.lower() == "true": 161 return Token(TokenType.BOOLEAN, True) 162 elif value.lower() == "false": 163 return Token(TokenType.BOOLEAN, False) 164 elif value.lower() == "null": 165 return Token(TokenType.NULL, None) 166 elif value.lower() == "and": 167 return Token(TokenType.AND, "and") 168 elif value.lower() == "or": 169 return Token(TokenType.OR, "or") 170 elif value.lower() == "not": 171 return Token(TokenType.NOT, "not") 172 elif value.lower() in self.OPERATORS: 173 return Token(TokenType.OPERATOR, value.lower()) 174 else: 175 return Token(TokenType.ATTRIBUTE, value) 176 177 # Skip unknown characters 178 self.advance() 179 180 return Token(TokenType.EOF, "")
class
TokenType(enum.Enum):
14class TokenType(Enum): 15 ATTRIBUTE = "ATTRIBUTE" 16 DOT = "DOT" 17 LBRACKET = "LBRACKET" 18 RBRACKET = "RBRACKET" 19 LPAREN = "LPAREN" 20 RPAREN = "RPAREN" 21 STRING = "STRING" 22 NUMBER = "NUMBER" 23 BOOLEAN = "BOOLEAN" 24 NULL = "NULL" 25 OPERATOR = "OPERATOR" 26 AND = "AND" 27 OR = "OR" 28 NOT = "NOT" 29 EOF = "EOF"
ATTRIBUTE =
<TokenType.ATTRIBUTE: 'ATTRIBUTE'>
DOT =
<TokenType.DOT: 'DOT'>
LBRACKET =
<TokenType.LBRACKET: 'LBRACKET'>
RBRACKET =
<TokenType.RBRACKET: 'RBRACKET'>
LPAREN =
<TokenType.LPAREN: 'LPAREN'>
RPAREN =
<TokenType.RPAREN: 'RPAREN'>
STRING =
<TokenType.STRING: 'STRING'>
NUMBER =
<TokenType.NUMBER: 'NUMBER'>
BOOLEAN =
<TokenType.BOOLEAN: 'BOOLEAN'>
NULL =
<TokenType.NULL: 'NULL'>
OPERATOR =
<TokenType.OPERATOR: 'OPERATOR'>
AND =
<TokenType.AND: 'AND'>
OR =
<TokenType.OR: 'OR'>
NOT =
<TokenType.NOT: 'NOT'>
EOF =
<TokenType.EOF: 'EOF'>
@dataclass
class
Token:
Token( type: TokenType, value: str, position: int = 0)
type: TokenType
class
SCIMPathLexer:
39class SCIMPathLexer: 40 """Lexer for SCIM paths and filter expressions""" 41 42 OPERATORS = ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"] 43 44 def __init__(self, text: str): 45 self.schema_urns = [ 46 SCIM_URN_SCHEMA, 47 SCIM_URN_GROUP, 48 SCIM_URN_USER, 49 SCIM_URN_USER_ENTERPRISE, 50 ] 51 self.text = text 52 self.pos = 0 53 self.current_char = self.text[self.pos] if self.pos < len(self.text) else None 54 55 def advance(self): 56 """Move to next character""" 57 self.pos += 1 58 self.current_char = self.text[self.pos] if self.pos < len(self.text) else None 59 60 def skip_whitespace(self): 61 """Skip whitespace characters""" 62 while self.current_char and self.current_char.isspace(): 63 self.advance() 64 65 def read_string(self, quote_char): 66 """Read a quoted string""" 67 value = "" 68 self.advance() # Skip opening quote 69 70 while self.current_char and self.current_char != quote_char: 71 if self.current_char == "\\": 72 self.advance() 73 if self.current_char: 74 value += self.current_char 75 self.advance() 76 else: 77 value += self.current_char 78 self.advance() 79 80 if self.current_char == quote_char: 81 self.advance() # Skip closing quote 82 83 return value 84 85 def read_number(self): 86 """Read a number (integer or float)""" 87 value = "" 88 while self.current_char and (self.current_char.isdigit() or self.current_char == "."): 89 value += self.current_char 90 self.advance() 91 return value 92 93 def read_identifier(self): 94 """Read an identifier (attribute name or operator) - supports URN format""" 95 value = "" 96 while self.current_char and (self.current_char.isalnum() or self.current_char in "_-:"): 97 value += self.current_char 98 self.advance() 99 # If the identifier value so far is a schema URN, take that as the identifier and 100 # treat the next part as a sub_attribute 101 if value in self.schema_urns: 102 self.current_char = "." 103 return value 104 105 # Handle dots within URN identifiers (like "2.0") 106 # A dot is part of the identifier if it's followed by a digit 107 if ( 108 self.current_char == "." 109 and self.pos + 1 < len(self.text) 110 and self.text[self.pos + 1].isdigit() 111 ): 112 value += self.current_char 113 self.advance() 114 # Continue reading digits after the dot 115 while self.current_char and self.current_char.isdigit(): 116 value += self.current_char 117 self.advance() 118 119 return value 120 121 def get_next_token(self) -> Token: # noqa PLR0911 122 """Get the next token from the input""" 123 while self.current_char: 124 if self.current_char.isspace(): 125 self.skip_whitespace() 126 continue 127 128 if self.current_char == ".": 129 self.advance() 130 return Token(TokenType.DOT, ".") 131 132 if self.current_char == "[": 133 self.advance() 134 return Token(TokenType.LBRACKET, "[") 135 136 if self.current_char == "]": 137 self.advance() 138 return Token(TokenType.RBRACKET, "]") 139 140 if self.current_char == "(": 141 self.advance() 142 return Token(TokenType.LPAREN, "(") 143 144 if self.current_char == ")": 145 self.advance() 146 return Token(TokenType.RPAREN, ")") 147 148 if self.current_char in "\"'": 149 quote_char = self.current_char 150 value = self.read_string(quote_char) 151 return Token(TokenType.STRING, value) 152 153 if self.current_char.isdigit(): 154 value = self.read_number() 155 return Token(TokenType.NUMBER, value) 156 157 if self.current_char.isalpha() or self.current_char == "_": 158 value = self.read_identifier() 159 160 # Check for special keywords 161 if value.lower() == "true": 162 return Token(TokenType.BOOLEAN, True) 163 elif value.lower() == "false": 164 return Token(TokenType.BOOLEAN, False) 165 elif value.lower() == "null": 166 return Token(TokenType.NULL, None) 167 elif value.lower() == "and": 168 return Token(TokenType.AND, "and") 169 elif value.lower() == "or": 170 return Token(TokenType.OR, "or") 171 elif value.lower() == "not": 172 return Token(TokenType.NOT, "not") 173 elif value.lower() in self.OPERATORS: 174 return Token(TokenType.OPERATOR, value.lower()) 175 else: 176 return Token(TokenType.ATTRIBUTE, value) 177 178 # Skip unknown characters 179 self.advance() 180 181 return Token(TokenType.EOF, "")
Lexer for SCIM paths and filter expressions
def
advance(self):
55 def advance(self): 56 """Move to next character""" 57 self.pos += 1 58 self.current_char = self.text[self.pos] if self.pos < len(self.text) else None
Move to next character
def
skip_whitespace(self):
60 def skip_whitespace(self): 61 """Skip whitespace characters""" 62 while self.current_char and self.current_char.isspace(): 63 self.advance()
Skip whitespace characters
def
read_string(self, quote_char):
65 def read_string(self, quote_char): 66 """Read a quoted string""" 67 value = "" 68 self.advance() # Skip opening quote 69 70 while self.current_char and self.current_char != quote_char: 71 if self.current_char == "\\": 72 self.advance() 73 if self.current_char: 74 value += self.current_char 75 self.advance() 76 else: 77 value += self.current_char 78 self.advance() 79 80 if self.current_char == quote_char: 81 self.advance() # Skip closing quote 82 83 return value
Read a quoted string
def
read_number(self):
85 def read_number(self): 86 """Read a number (integer or float)""" 87 value = "" 88 while self.current_char and (self.current_char.isdigit() or self.current_char == "."): 89 value += self.current_char 90 self.advance() 91 return value
Read a number (integer or float)
def
read_identifier(self):
93 def read_identifier(self): 94 """Read an identifier (attribute name or operator) - supports URN format""" 95 value = "" 96 while self.current_char and (self.current_char.isalnum() or self.current_char in "_-:"): 97 value += self.current_char 98 self.advance() 99 # If the identifier value so far is a schema URN, take that as the identifier and 100 # treat the next part as a sub_attribute 101 if value in self.schema_urns: 102 self.current_char = "." 103 return value 104 105 # Handle dots within URN identifiers (like "2.0") 106 # A dot is part of the identifier if it's followed by a digit 107 if ( 108 self.current_char == "." 109 and self.pos + 1 < len(self.text) 110 and self.text[self.pos + 1].isdigit() 111 ): 112 value += self.current_char 113 self.advance() 114 # Continue reading digits after the dot 115 while self.current_char and self.current_char.isdigit(): 116 value += self.current_char 117 self.advance() 118 119 return value
Read an identifier (attribute name or operator) - supports URN format
121 def get_next_token(self) -> Token: # noqa PLR0911 122 """Get the next token from the input""" 123 while self.current_char: 124 if self.current_char.isspace(): 125 self.skip_whitespace() 126 continue 127 128 if self.current_char == ".": 129 self.advance() 130 return Token(TokenType.DOT, ".") 131 132 if self.current_char == "[": 133 self.advance() 134 return Token(TokenType.LBRACKET, "[") 135 136 if self.current_char == "]": 137 self.advance() 138 return Token(TokenType.RBRACKET, "]") 139 140 if self.current_char == "(": 141 self.advance() 142 return Token(TokenType.LPAREN, "(") 143 144 if self.current_char == ")": 145 self.advance() 146 return Token(TokenType.RPAREN, ")") 147 148 if self.current_char in "\"'": 149 quote_char = self.current_char 150 value = self.read_string(quote_char) 151 return Token(TokenType.STRING, value) 152 153 if self.current_char.isdigit(): 154 value = self.read_number() 155 return Token(TokenType.NUMBER, value) 156 157 if self.current_char.isalpha() or self.current_char == "_": 158 value = self.read_identifier() 159 160 # Check for special keywords 161 if value.lower() == "true": 162 return Token(TokenType.BOOLEAN, True) 163 elif value.lower() == "false": 164 return Token(TokenType.BOOLEAN, False) 165 elif value.lower() == "null": 166 return Token(TokenType.NULL, None) 167 elif value.lower() == "and": 168 return Token(TokenType.AND, "and") 169 elif value.lower() == "or": 170 return Token(TokenType.OR, "or") 171 elif value.lower() == "not": 172 return Token(TokenType.NOT, "not") 173 elif value.lower() in self.OPERATORS: 174 return Token(TokenType.OPERATOR, value.lower()) 175 else: 176 return Token(TokenType.ATTRIBUTE, value) 177 178 # Skip unknown characters 179 self.advance() 180 181 return Token(TokenType.EOF, "")
Get the next token from the input