authentik.sources.scim.tests.test_lexer
1from unittest import TestCase 2 3from authentik.sources.scim.constants import ( 4 SCIM_URN_GROUP, 5 SCIM_URN_SCHEMA, 6 SCIM_URN_USER, 7 SCIM_URN_USER_ENTERPRISE, 8) 9from authentik.sources.scim.patch.lexer import SCIMPathLexer, Token, TokenType 10 11 12class TestTokenType(TestCase): 13 """Test TokenType enum""" 14 15 def test_token_type_values(self): 16 """Test that all token types have correct values""" 17 self.assertEqual(TokenType.ATTRIBUTE.value, "ATTRIBUTE") 18 self.assertEqual(TokenType.DOT.value, "DOT") 19 self.assertEqual(TokenType.LBRACKET.value, "LBRACKET") 20 self.assertEqual(TokenType.RBRACKET.value, "RBRACKET") 21 self.assertEqual(TokenType.LPAREN.value, "LPAREN") 22 self.assertEqual(TokenType.RPAREN.value, "RPAREN") 23 self.assertEqual(TokenType.STRING.value, "STRING") 24 self.assertEqual(TokenType.NUMBER.value, "NUMBER") 25 self.assertEqual(TokenType.BOOLEAN.value, "BOOLEAN") 26 self.assertEqual(TokenType.NULL.value, "NULL") 27 self.assertEqual(TokenType.OPERATOR.value, "OPERATOR") 28 self.assertEqual(TokenType.AND.value, "AND") 29 self.assertEqual(TokenType.OR.value, "OR") 30 self.assertEqual(TokenType.NOT.value, "NOT") 31 self.assertEqual(TokenType.EOF.value, "EOF") 32 33 34class TestToken(TestCase): 35 """Test Token dataclass""" 36 37 def test_token_creation(self): 38 """Test token creation with all parameters""" 39 token = Token(TokenType.ATTRIBUTE, "userName", 5) 40 self.assertEqual(token.type, TokenType.ATTRIBUTE) 41 self.assertEqual(token.value, "userName") 42 self.assertEqual(token.position, 5) 43 44 def test_token_creation_default_position(self): 45 """Test token creation with default position""" 46 token = Token(TokenType.DOT, ".") 47 self.assertEqual(token.type, TokenType.DOT) 48 self.assertEqual(token.value, ".") 49 self.assertEqual(token.position, 0) 50 51 52class TestSCIMPathLexer(TestCase): 53 """Test SCIMPathLexer class""" 54 55 def setUp(self): 56 """Set up test fixtures""" 57 self.simple_lexer = SCIMPathLexer("userName") 58 59 def test_init(self): 60 """Test lexer initialization""" 61 lexer = SCIMPathLexer("test") 62 self.assertEqual(lexer.text, "test") 63 self.assertEqual(lexer.pos, 0) 64 self.assertEqual(lexer.current_char, "t") 65 self.assertIn(SCIM_URN_SCHEMA, lexer.schema_urns) 66 self.assertIn(SCIM_URN_GROUP, lexer.schema_urns) 67 self.assertIn(SCIM_URN_USER, lexer.schema_urns) 68 self.assertIn(SCIM_URN_USER_ENTERPRISE, lexer.schema_urns) 69 self.assertEqual( 70 lexer.OPERATORS, ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"] 71 ) 72 73 def test_init_empty_string(self): 74 """Test lexer initialization with empty string""" 75 lexer = SCIMPathLexer("") 76 self.assertEqual(lexer.text, "") 77 self.assertEqual(lexer.pos, 0) 78 self.assertIsNone(lexer.current_char) 79 80 def test_advance(self): 81 """Test advance method""" 82 lexer = SCIMPathLexer("abc") 83 self.assertEqual(lexer.current_char, "a") 84 85 lexer.advance() 86 self.assertEqual(lexer.pos, 1) 87 self.assertEqual(lexer.current_char, "b") 88 89 lexer.advance() 90 self.assertEqual(lexer.pos, 2) 91 self.assertEqual(lexer.current_char, "c") 92 93 lexer.advance() 94 self.assertEqual(lexer.pos, 3) 95 self.assertIsNone(lexer.current_char) 96 97 def test_skip_whitespace(self): 98 """Test skip_whitespace method""" 99 lexer = SCIMPathLexer(" \t\n abc") 100 lexer.skip_whitespace() 101 self.assertEqual(lexer.current_char, "a") 102 103 def test_skip_whitespace_only_whitespace(self): 104 """Test skip_whitespace with only whitespace""" 105 lexer = SCIMPathLexer(" \t\n ") 106 lexer.skip_whitespace() 107 self.assertIsNone(lexer.current_char) 108 109 def test_skip_whitespace_no_whitespace(self): 110 """Test skip_whitespace with no leading whitespace""" 111 lexer = SCIMPathLexer("abc") 112 original_pos = lexer.pos 113 lexer.skip_whitespace() 114 self.assertEqual(lexer.pos, original_pos) 115 self.assertEqual(lexer.current_char, "a") 116 117 def test_read_string_double_quotes(self): 118 """Test reading double-quoted string""" 119 lexer = SCIMPathLexer('"hello world"') 120 result = lexer.read_string('"') 121 self.assertEqual(result, "hello world") 122 self.assertIsNone(lexer.current_char) # Should be at end 123 124 def test_read_string_single_quotes(self): 125 """Test reading single-quoted string""" 126 lexer = SCIMPathLexer("'hello world'") 127 result = lexer.read_string("'") 128 self.assertEqual(result, "hello world") 129 self.assertIsNone(lexer.current_char) 130 131 def test_read_string_with_escapes(self): 132 """Test reading string with escape characters""" 133 lexer = SCIMPathLexer('"hello \\"world\\""') 134 result = lexer.read_string('"') 135 self.assertEqual(result, 'hello "world"') 136 137 def test_read_string_with_backslash_at_end(self): 138 """Test reading string with backslash at end""" 139 lexer = SCIMPathLexer('"hello\\"') 140 result = lexer.read_string('"') 141 self.assertEqual(result, 'hello"') 142 143 def test_read_string_unclosed(self): 144 """Test reading unclosed string""" 145 lexer = SCIMPathLexer('"hello world') 146 result = lexer.read_string('"') 147 self.assertEqual(result, "hello world") 148 self.assertIsNone(lexer.current_char) 149 150 def test_read_string_empty(self): 151 """Test reading empty string""" 152 lexer = SCIMPathLexer('""') 153 result = lexer.read_string('"') 154 self.assertEqual(result, "") 155 156 def test_read_number_integer(self): 157 """Test reading integer number""" 158 lexer = SCIMPathLexer("123") 159 result = lexer.read_number() 160 self.assertEqual(result, "123") 161 self.assertIsNone(lexer.current_char) 162 163 def test_read_number_float(self): 164 """Test reading float number""" 165 lexer = SCIMPathLexer("123.456") 166 result = lexer.read_number() 167 self.assertEqual(result, "123.456") 168 self.assertIsNone(lexer.current_char) 169 170 def test_read_number_with_multiple_dots(self): 171 """Test reading number with multiple dots (invalid but handled)""" 172 lexer = SCIMPathLexer("123.456.789") 173 result = lexer.read_number() 174 self.assertEqual(result, "123.456.789") 175 self.assertIsNone(lexer.current_char) 176 177 def test_read_number_starting_with_dot(self): 178 """Test reading number starting with dot""" 179 lexer = SCIMPathLexer(".123") 180 result = lexer.read_number() 181 self.assertEqual(result, ".123") 182 183 def test_read_identifier_simple(self): 184 """Test reading simple identifier""" 185 lexer = SCIMPathLexer("userName") 186 result = lexer.read_identifier() 187 self.assertEqual(result, "userName") 188 self.assertIsNone(lexer.current_char) 189 190 def test_read_identifier_with_underscore(self): 191 """Test reading identifier with underscore""" 192 lexer = SCIMPathLexer("user_name") 193 result = lexer.read_identifier() 194 self.assertEqual(result, "user_name") 195 196 def test_read_identifier_with_hyphen(self): 197 """Test reading identifier with hyphen""" 198 lexer = SCIMPathLexer("user-name") 199 result = lexer.read_identifier() 200 self.assertEqual(result, "user-name") 201 202 def test_read_identifier_with_colon(self): 203 """Test reading identifier with colon (URN format)""" 204 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User") 205 result = lexer.read_identifier() 206 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User") 207 208 def test_read_identifier_schema_urn(self): 209 """Test reading schema URN identifier""" 210 lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName") 211 result = lexer.read_identifier() 212 self.assertEqual(result, SCIM_URN_USER) 213 self.assertEqual(lexer.current_char, ".") # Should stop at dot and set current_char to dot 214 215 def test_read_identifier_with_version_number(self): 216 """Test reading identifier with version number (dots followed by digits)""" 217 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User") 218 result = lexer.read_identifier() 219 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User") 220 221 def test_read_identifier_partial_urn_match(self): 222 """Test reading identifier that partially matches URN""" 223 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:CustomUser") 224 result = lexer.read_identifier() 225 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:CustomUser") 226 227 # Test get_next_token method 228 def test_get_next_token_dot(self): 229 """Test tokenizing dot""" 230 lexer = SCIMPathLexer(".") 231 token = lexer.get_next_token() 232 self.assertEqual(token.type, TokenType.DOT) 233 self.assertEqual(token.value, ".") 234 235 def test_get_next_token_lbracket(self): 236 """Test tokenizing left bracket""" 237 lexer = SCIMPathLexer("[") 238 token = lexer.get_next_token() 239 self.assertEqual(token.type, TokenType.LBRACKET) 240 self.assertEqual(token.value, "[") 241 242 def test_get_next_token_rbracket(self): 243 """Test tokenizing right bracket""" 244 lexer = SCIMPathLexer("]") 245 token = lexer.get_next_token() 246 self.assertEqual(token.type, TokenType.RBRACKET) 247 self.assertEqual(token.value, "]") 248 249 def test_get_next_token_lparen(self): 250 """Test tokenizing left parenthesis""" 251 lexer = SCIMPathLexer("(") 252 token = lexer.get_next_token() 253 self.assertEqual(token.type, TokenType.LPAREN) 254 self.assertEqual(token.value, "(") 255 256 def test_get_next_token_rparen(self): 257 """Test tokenizing right parenthesis""" 258 lexer = SCIMPathLexer(")") 259 token = lexer.get_next_token() 260 self.assertEqual(token.type, TokenType.RPAREN) 261 self.assertEqual(token.value, ")") 262 263 def test_get_next_token_string_double_quotes(self): 264 """Test tokenizing double-quoted string""" 265 lexer = SCIMPathLexer('"test string"') 266 token = lexer.get_next_token() 267 self.assertEqual(token.type, TokenType.STRING) 268 self.assertEqual(token.value, "test string") 269 270 def test_get_next_token_string_single_quotes(self): 271 """Test tokenizing single-quoted string""" 272 lexer = SCIMPathLexer("'test string'") 273 token = lexer.get_next_token() 274 self.assertEqual(token.type, TokenType.STRING) 275 self.assertEqual(token.value, "test string") 276 277 def test_get_next_token_number_integer(self): 278 """Test tokenizing integer""" 279 lexer = SCIMPathLexer("123") 280 token = lexer.get_next_token() 281 self.assertEqual(token.type, TokenType.NUMBER) 282 self.assertEqual(token.value, "123") 283 284 def test_get_next_token_number_float(self): 285 """Test tokenizing float""" 286 lexer = SCIMPathLexer("123.45") 287 token = lexer.get_next_token() 288 self.assertEqual(token.type, TokenType.NUMBER) 289 self.assertEqual(token.value, "123.45") 290 291 def test_get_next_token_boolean_true(self): 292 """Test tokenizing boolean true""" 293 lexer = SCIMPathLexer("true") 294 token = lexer.get_next_token() 295 self.assertEqual(token.type, TokenType.BOOLEAN) 296 self.assertTrue(token.value) 297 298 def test_get_next_token_boolean_false(self): 299 """Test tokenizing boolean false""" 300 lexer = SCIMPathLexer("false") 301 token = lexer.get_next_token() 302 self.assertEqual(token.type, TokenType.BOOLEAN) 303 self.assertFalse(token.value) 304 305 def test_get_next_token_boolean_case_insensitive(self): 306 """Test tokenizing boolean with different cases""" 307 for value in ["TRUE", "True", "FALSE", "False"]: 308 with self.subTest(value=value): 309 lexer = SCIMPathLexer(value) 310 token = lexer.get_next_token() 311 self.assertEqual(token.type, TokenType.BOOLEAN) 312 313 def test_get_next_token_null(self): 314 """Test tokenizing null""" 315 lexer = SCIMPathLexer("null") 316 token = lexer.get_next_token() 317 self.assertEqual(token.type, TokenType.NULL) 318 self.assertIsNone(token.value) 319 320 def test_get_next_token_null_case_insensitive(self): 321 """Test tokenizing null with different cases""" 322 for value in ["NULL", "Null"]: 323 with self.subTest(value=value): 324 lexer = SCIMPathLexer(value) 325 token = lexer.get_next_token() 326 self.assertEqual(token.type, TokenType.NULL) 327 328 def test_get_next_token_and(self): 329 """Test tokenizing AND operator""" 330 lexer = SCIMPathLexer("and") 331 token = lexer.get_next_token() 332 self.assertEqual(token.type, TokenType.AND) 333 self.assertEqual(token.value, "and") 334 335 def test_get_next_token_or(self): 336 """Test tokenizing OR operator""" 337 lexer = SCIMPathLexer("or") 338 token = lexer.get_next_token() 339 self.assertEqual(token.type, TokenType.OR) 340 self.assertEqual(token.value, "or") 341 342 def test_get_next_token_not(self): 343 """Test tokenizing NOT operator""" 344 lexer = SCIMPathLexer("not") 345 token = lexer.get_next_token() 346 self.assertEqual(token.type, TokenType.NOT) 347 self.assertEqual(token.value, "not") 348 349 def test_get_next_token_operators(self): 350 """Test tokenizing all comparison operators""" 351 operators = ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"] 352 for op in operators: 353 with self.subTest(operator=op): 354 lexer = SCIMPathLexer(op) 355 token = lexer.get_next_token() 356 self.assertEqual(token.type, TokenType.OPERATOR) 357 self.assertEqual(token.value, op) 358 359 def test_get_next_token_operators_case_insensitive(self): 360 """Test tokenizing operators with different cases""" 361 for op in ["EQ", "Eq", "NE", "Ne"]: 362 with self.subTest(operator=op): 363 lexer = SCIMPathLexer(op) 364 token = lexer.get_next_token() 365 self.assertEqual(token.type, TokenType.OPERATOR) 366 self.assertEqual(token.value, op.lower()) 367 368 def test_get_next_token_attribute(self): 369 """Test tokenizing attribute name""" 370 lexer = SCIMPathLexer("userName") 371 token = lexer.get_next_token() 372 self.assertEqual(token.type, TokenType.ATTRIBUTE) 373 self.assertEqual(token.value, "userName") 374 375 def test_get_next_token_attribute_with_underscore(self): 376 """Test tokenizing attribute name with underscore""" 377 lexer = SCIMPathLexer("_userName") 378 token = lexer.get_next_token() 379 self.assertEqual(token.type, TokenType.ATTRIBUTE) 380 self.assertEqual(token.value, "_userName") 381 382 def test_get_next_token_eof(self): 383 """Test tokenizing end of file""" 384 lexer = SCIMPathLexer("") 385 token = lexer.get_next_token() 386 self.assertEqual(token.type, TokenType.EOF) 387 self.assertEqual(token.value, "") 388 389 def test_get_next_token_with_whitespace(self): 390 """Test tokenizing with leading whitespace""" 391 lexer = SCIMPathLexer(" userName") 392 token = lexer.get_next_token() 393 self.assertEqual(token.type, TokenType.ATTRIBUTE) 394 self.assertEqual(token.value, "userName") 395 396 def test_get_next_token_skip_unknown_characters(self): 397 """Test that unknown characters are skipped""" 398 lexer = SCIMPathLexer("@#$userName") 399 token = lexer.get_next_token() 400 self.assertEqual(token.type, TokenType.ATTRIBUTE) 401 self.assertEqual(token.value, "userName") 402 403 def test_get_next_token_multiple_tokens(self): 404 """Test tokenizing multiple tokens in sequence""" 405 lexer = SCIMPathLexer("userName.givenName") 406 407 token1 = lexer.get_next_token() 408 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 409 self.assertEqual(token1.value, "userName") 410 411 token2 = lexer.get_next_token() 412 self.assertEqual(token2.type, TokenType.DOT) 413 self.assertEqual(token2.value, ".") 414 415 token3 = lexer.get_next_token() 416 self.assertEqual(token3.type, TokenType.ATTRIBUTE) 417 self.assertEqual(token3.value, "givenName") 418 419 token4 = lexer.get_next_token() 420 self.assertEqual(token4.type, TokenType.EOF) 421 422 def test_get_next_token_complex_filter(self): 423 """Test tokenizing complex filter expression""" 424 lexer = SCIMPathLexer('emails[type eq "work" and primary eq true]') 425 426 tokens = [] 427 while True: 428 token = lexer.get_next_token() 429 tokens.append(token) 430 if token.type == TokenType.EOF: 431 break 432 433 expected_types = [ 434 TokenType.ATTRIBUTE, # emails 435 TokenType.LBRACKET, # [ 436 TokenType.ATTRIBUTE, # type 437 TokenType.OPERATOR, # eq 438 TokenType.STRING, # "work" 439 TokenType.AND, # and 440 TokenType.ATTRIBUTE, # primary 441 TokenType.OPERATOR, # eq 442 TokenType.BOOLEAN, # true 443 TokenType.RBRACKET, # ] 444 TokenType.EOF, 445 ] 446 447 self.assertEqual(len(tokens), len(expected_types)) 448 for token, expected_type in zip(tokens, expected_types, strict=False): 449 self.assertEqual(token.type, expected_type) 450 451 def test_get_next_token_urn_attribute(self): 452 """Test tokenizing URN-based attribute""" 453 lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName") 454 455 token1 = lexer.get_next_token() 456 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 457 self.assertEqual(token1.value, SCIM_URN_USER) 458 459 token2 = lexer.get_next_token() 460 self.assertEqual(token2.type, TokenType.DOT) 461 462 token3 = lexer.get_next_token() 463 self.assertEqual(token3.type, TokenType.ATTRIBUTE) 464 self.assertEqual(token3.value, "userName") 465 466 def test_get_next_token_enterprise_urn(self): 467 """Test tokenizing enterprise URN""" 468 lexer = SCIMPathLexer(f"{SCIM_URN_USER_ENTERPRISE}.manager") 469 470 token1 = lexer.get_next_token() 471 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 472 self.assertEqual(token1.value, SCIM_URN_USER_ENTERPRISE) 473 474 token2 = lexer.get_next_token() 475 self.assertEqual(token2.type, TokenType.DOT) 476 477 def test_lexer_state_after_eof(self): 478 """Test lexer state after reaching EOF""" 479 lexer = SCIMPathLexer("a") 480 481 # Get first token 482 token1 = lexer.get_next_token() 483 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 484 485 # Get EOF token 486 token2 = lexer.get_next_token() 487 self.assertEqual(token2.type, TokenType.EOF) 488 489 # Should continue returning EOF 490 token3 = lexer.get_next_token() 491 self.assertEqual(token3.type, TokenType.EOF) 492 493 def test_read_identifier_edge_cases(self): 494 """Test read_identifier with edge cases""" 495 # Test identifier ending with colon 496 lexer = SCIMPathLexer("test:") 497 result = lexer.read_identifier() 498 self.assertEqual(result, "test:") 499 500 # Test identifier with numbers 501 lexer = SCIMPathLexer("test123") 502 result = lexer.read_identifier() 503 self.assertEqual(result, "test123") 504 505 def test_complex_urn_parsing(self): 506 """Test parsing complex URN with version numbers""" 507 urn = "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User" 508 lexer = SCIMPathLexer(urn) 509 result = lexer.read_identifier() 510 self.assertEqual(result, urn)
13class TestTokenType(TestCase): 14 """Test TokenType enum""" 15 16 def test_token_type_values(self): 17 """Test that all token types have correct values""" 18 self.assertEqual(TokenType.ATTRIBUTE.value, "ATTRIBUTE") 19 self.assertEqual(TokenType.DOT.value, "DOT") 20 self.assertEqual(TokenType.LBRACKET.value, "LBRACKET") 21 self.assertEqual(TokenType.RBRACKET.value, "RBRACKET") 22 self.assertEqual(TokenType.LPAREN.value, "LPAREN") 23 self.assertEqual(TokenType.RPAREN.value, "RPAREN") 24 self.assertEqual(TokenType.STRING.value, "STRING") 25 self.assertEqual(TokenType.NUMBER.value, "NUMBER") 26 self.assertEqual(TokenType.BOOLEAN.value, "BOOLEAN") 27 self.assertEqual(TokenType.NULL.value, "NULL") 28 self.assertEqual(TokenType.OPERATOR.value, "OPERATOR") 29 self.assertEqual(TokenType.AND.value, "AND") 30 self.assertEqual(TokenType.OR.value, "OR") 31 self.assertEqual(TokenType.NOT.value, "NOT") 32 self.assertEqual(TokenType.EOF.value, "EOF")
Test TokenType enum
16 def test_token_type_values(self): 17 """Test that all token types have correct values""" 18 self.assertEqual(TokenType.ATTRIBUTE.value, "ATTRIBUTE") 19 self.assertEqual(TokenType.DOT.value, "DOT") 20 self.assertEqual(TokenType.LBRACKET.value, "LBRACKET") 21 self.assertEqual(TokenType.RBRACKET.value, "RBRACKET") 22 self.assertEqual(TokenType.LPAREN.value, "LPAREN") 23 self.assertEqual(TokenType.RPAREN.value, "RPAREN") 24 self.assertEqual(TokenType.STRING.value, "STRING") 25 self.assertEqual(TokenType.NUMBER.value, "NUMBER") 26 self.assertEqual(TokenType.BOOLEAN.value, "BOOLEAN") 27 self.assertEqual(TokenType.NULL.value, "NULL") 28 self.assertEqual(TokenType.OPERATOR.value, "OPERATOR") 29 self.assertEqual(TokenType.AND.value, "AND") 30 self.assertEqual(TokenType.OR.value, "OR") 31 self.assertEqual(TokenType.NOT.value, "NOT") 32 self.assertEqual(TokenType.EOF.value, "EOF")
Test that all token types have correct values
35class TestToken(TestCase): 36 """Test Token dataclass""" 37 38 def test_token_creation(self): 39 """Test token creation with all parameters""" 40 token = Token(TokenType.ATTRIBUTE, "userName", 5) 41 self.assertEqual(token.type, TokenType.ATTRIBUTE) 42 self.assertEqual(token.value, "userName") 43 self.assertEqual(token.position, 5) 44 45 def test_token_creation_default_position(self): 46 """Test token creation with default position""" 47 token = Token(TokenType.DOT, ".") 48 self.assertEqual(token.type, TokenType.DOT) 49 self.assertEqual(token.value, ".") 50 self.assertEqual(token.position, 0)
Test Token dataclass
38 def test_token_creation(self): 39 """Test token creation with all parameters""" 40 token = Token(TokenType.ATTRIBUTE, "userName", 5) 41 self.assertEqual(token.type, TokenType.ATTRIBUTE) 42 self.assertEqual(token.value, "userName") 43 self.assertEqual(token.position, 5)
Test token creation with all parameters
45 def test_token_creation_default_position(self): 46 """Test token creation with default position""" 47 token = Token(TokenType.DOT, ".") 48 self.assertEqual(token.type, TokenType.DOT) 49 self.assertEqual(token.value, ".") 50 self.assertEqual(token.position, 0)
Test token creation with default position
53class TestSCIMPathLexer(TestCase): 54 """Test SCIMPathLexer class""" 55 56 def setUp(self): 57 """Set up test fixtures""" 58 self.simple_lexer = SCIMPathLexer("userName") 59 60 def test_init(self): 61 """Test lexer initialization""" 62 lexer = SCIMPathLexer("test") 63 self.assertEqual(lexer.text, "test") 64 self.assertEqual(lexer.pos, 0) 65 self.assertEqual(lexer.current_char, "t") 66 self.assertIn(SCIM_URN_SCHEMA, lexer.schema_urns) 67 self.assertIn(SCIM_URN_GROUP, lexer.schema_urns) 68 self.assertIn(SCIM_URN_USER, lexer.schema_urns) 69 self.assertIn(SCIM_URN_USER_ENTERPRISE, lexer.schema_urns) 70 self.assertEqual( 71 lexer.OPERATORS, ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"] 72 ) 73 74 def test_init_empty_string(self): 75 """Test lexer initialization with empty string""" 76 lexer = SCIMPathLexer("") 77 self.assertEqual(lexer.text, "") 78 self.assertEqual(lexer.pos, 0) 79 self.assertIsNone(lexer.current_char) 80 81 def test_advance(self): 82 """Test advance method""" 83 lexer = SCIMPathLexer("abc") 84 self.assertEqual(lexer.current_char, "a") 85 86 lexer.advance() 87 self.assertEqual(lexer.pos, 1) 88 self.assertEqual(lexer.current_char, "b") 89 90 lexer.advance() 91 self.assertEqual(lexer.pos, 2) 92 self.assertEqual(lexer.current_char, "c") 93 94 lexer.advance() 95 self.assertEqual(lexer.pos, 3) 96 self.assertIsNone(lexer.current_char) 97 98 def test_skip_whitespace(self): 99 """Test skip_whitespace method""" 100 lexer = SCIMPathLexer(" \t\n abc") 101 lexer.skip_whitespace() 102 self.assertEqual(lexer.current_char, "a") 103 104 def test_skip_whitespace_only_whitespace(self): 105 """Test skip_whitespace with only whitespace""" 106 lexer = SCIMPathLexer(" \t\n ") 107 lexer.skip_whitespace() 108 self.assertIsNone(lexer.current_char) 109 110 def test_skip_whitespace_no_whitespace(self): 111 """Test skip_whitespace with no leading whitespace""" 112 lexer = SCIMPathLexer("abc") 113 original_pos = lexer.pos 114 lexer.skip_whitespace() 115 self.assertEqual(lexer.pos, original_pos) 116 self.assertEqual(lexer.current_char, "a") 117 118 def test_read_string_double_quotes(self): 119 """Test reading double-quoted string""" 120 lexer = SCIMPathLexer('"hello world"') 121 result = lexer.read_string('"') 122 self.assertEqual(result, "hello world") 123 self.assertIsNone(lexer.current_char) # Should be at end 124 125 def test_read_string_single_quotes(self): 126 """Test reading single-quoted string""" 127 lexer = SCIMPathLexer("'hello world'") 128 result = lexer.read_string("'") 129 self.assertEqual(result, "hello world") 130 self.assertIsNone(lexer.current_char) 131 132 def test_read_string_with_escapes(self): 133 """Test reading string with escape characters""" 134 lexer = SCIMPathLexer('"hello \\"world\\""') 135 result = lexer.read_string('"') 136 self.assertEqual(result, 'hello "world"') 137 138 def test_read_string_with_backslash_at_end(self): 139 """Test reading string with backslash at end""" 140 lexer = SCIMPathLexer('"hello\\"') 141 result = lexer.read_string('"') 142 self.assertEqual(result, 'hello"') 143 144 def test_read_string_unclosed(self): 145 """Test reading unclosed string""" 146 lexer = SCIMPathLexer('"hello world') 147 result = lexer.read_string('"') 148 self.assertEqual(result, "hello world") 149 self.assertIsNone(lexer.current_char) 150 151 def test_read_string_empty(self): 152 """Test reading empty string""" 153 lexer = SCIMPathLexer('""') 154 result = lexer.read_string('"') 155 self.assertEqual(result, "") 156 157 def test_read_number_integer(self): 158 """Test reading integer number""" 159 lexer = SCIMPathLexer("123") 160 result = lexer.read_number() 161 self.assertEqual(result, "123") 162 self.assertIsNone(lexer.current_char) 163 164 def test_read_number_float(self): 165 """Test reading float number""" 166 lexer = SCIMPathLexer("123.456") 167 result = lexer.read_number() 168 self.assertEqual(result, "123.456") 169 self.assertIsNone(lexer.current_char) 170 171 def test_read_number_with_multiple_dots(self): 172 """Test reading number with multiple dots (invalid but handled)""" 173 lexer = SCIMPathLexer("123.456.789") 174 result = lexer.read_number() 175 self.assertEqual(result, "123.456.789") 176 self.assertIsNone(lexer.current_char) 177 178 def test_read_number_starting_with_dot(self): 179 """Test reading number starting with dot""" 180 lexer = SCIMPathLexer(".123") 181 result = lexer.read_number() 182 self.assertEqual(result, ".123") 183 184 def test_read_identifier_simple(self): 185 """Test reading simple identifier""" 186 lexer = SCIMPathLexer("userName") 187 result = lexer.read_identifier() 188 self.assertEqual(result, "userName") 189 self.assertIsNone(lexer.current_char) 190 191 def test_read_identifier_with_underscore(self): 192 """Test reading identifier with underscore""" 193 lexer = SCIMPathLexer("user_name") 194 result = lexer.read_identifier() 195 self.assertEqual(result, "user_name") 196 197 def test_read_identifier_with_hyphen(self): 198 """Test reading identifier with hyphen""" 199 lexer = SCIMPathLexer("user-name") 200 result = lexer.read_identifier() 201 self.assertEqual(result, "user-name") 202 203 def test_read_identifier_with_colon(self): 204 """Test reading identifier with colon (URN format)""" 205 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User") 206 result = lexer.read_identifier() 207 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User") 208 209 def test_read_identifier_schema_urn(self): 210 """Test reading schema URN identifier""" 211 lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName") 212 result = lexer.read_identifier() 213 self.assertEqual(result, SCIM_URN_USER) 214 self.assertEqual(lexer.current_char, ".") # Should stop at dot and set current_char to dot 215 216 def test_read_identifier_with_version_number(self): 217 """Test reading identifier with version number (dots followed by digits)""" 218 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User") 219 result = lexer.read_identifier() 220 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User") 221 222 def test_read_identifier_partial_urn_match(self): 223 """Test reading identifier that partially matches URN""" 224 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:CustomUser") 225 result = lexer.read_identifier() 226 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:CustomUser") 227 228 # Test get_next_token method 229 def test_get_next_token_dot(self): 230 """Test tokenizing dot""" 231 lexer = SCIMPathLexer(".") 232 token = lexer.get_next_token() 233 self.assertEqual(token.type, TokenType.DOT) 234 self.assertEqual(token.value, ".") 235 236 def test_get_next_token_lbracket(self): 237 """Test tokenizing left bracket""" 238 lexer = SCIMPathLexer("[") 239 token = lexer.get_next_token() 240 self.assertEqual(token.type, TokenType.LBRACKET) 241 self.assertEqual(token.value, "[") 242 243 def test_get_next_token_rbracket(self): 244 """Test tokenizing right bracket""" 245 lexer = SCIMPathLexer("]") 246 token = lexer.get_next_token() 247 self.assertEqual(token.type, TokenType.RBRACKET) 248 self.assertEqual(token.value, "]") 249 250 def test_get_next_token_lparen(self): 251 """Test tokenizing left parenthesis""" 252 lexer = SCIMPathLexer("(") 253 token = lexer.get_next_token() 254 self.assertEqual(token.type, TokenType.LPAREN) 255 self.assertEqual(token.value, "(") 256 257 def test_get_next_token_rparen(self): 258 """Test tokenizing right parenthesis""" 259 lexer = SCIMPathLexer(")") 260 token = lexer.get_next_token() 261 self.assertEqual(token.type, TokenType.RPAREN) 262 self.assertEqual(token.value, ")") 263 264 def test_get_next_token_string_double_quotes(self): 265 """Test tokenizing double-quoted string""" 266 lexer = SCIMPathLexer('"test string"') 267 token = lexer.get_next_token() 268 self.assertEqual(token.type, TokenType.STRING) 269 self.assertEqual(token.value, "test string") 270 271 def test_get_next_token_string_single_quotes(self): 272 """Test tokenizing single-quoted string""" 273 lexer = SCIMPathLexer("'test string'") 274 token = lexer.get_next_token() 275 self.assertEqual(token.type, TokenType.STRING) 276 self.assertEqual(token.value, "test string") 277 278 def test_get_next_token_number_integer(self): 279 """Test tokenizing integer""" 280 lexer = SCIMPathLexer("123") 281 token = lexer.get_next_token() 282 self.assertEqual(token.type, TokenType.NUMBER) 283 self.assertEqual(token.value, "123") 284 285 def test_get_next_token_number_float(self): 286 """Test tokenizing float""" 287 lexer = SCIMPathLexer("123.45") 288 token = lexer.get_next_token() 289 self.assertEqual(token.type, TokenType.NUMBER) 290 self.assertEqual(token.value, "123.45") 291 292 def test_get_next_token_boolean_true(self): 293 """Test tokenizing boolean true""" 294 lexer = SCIMPathLexer("true") 295 token = lexer.get_next_token() 296 self.assertEqual(token.type, TokenType.BOOLEAN) 297 self.assertTrue(token.value) 298 299 def test_get_next_token_boolean_false(self): 300 """Test tokenizing boolean false""" 301 lexer = SCIMPathLexer("false") 302 token = lexer.get_next_token() 303 self.assertEqual(token.type, TokenType.BOOLEAN) 304 self.assertFalse(token.value) 305 306 def test_get_next_token_boolean_case_insensitive(self): 307 """Test tokenizing boolean with different cases""" 308 for value in ["TRUE", "True", "FALSE", "False"]: 309 with self.subTest(value=value): 310 lexer = SCIMPathLexer(value) 311 token = lexer.get_next_token() 312 self.assertEqual(token.type, TokenType.BOOLEAN) 313 314 def test_get_next_token_null(self): 315 """Test tokenizing null""" 316 lexer = SCIMPathLexer("null") 317 token = lexer.get_next_token() 318 self.assertEqual(token.type, TokenType.NULL) 319 self.assertIsNone(token.value) 320 321 def test_get_next_token_null_case_insensitive(self): 322 """Test tokenizing null with different cases""" 323 for value in ["NULL", "Null"]: 324 with self.subTest(value=value): 325 lexer = SCIMPathLexer(value) 326 token = lexer.get_next_token() 327 self.assertEqual(token.type, TokenType.NULL) 328 329 def test_get_next_token_and(self): 330 """Test tokenizing AND operator""" 331 lexer = SCIMPathLexer("and") 332 token = lexer.get_next_token() 333 self.assertEqual(token.type, TokenType.AND) 334 self.assertEqual(token.value, "and") 335 336 def test_get_next_token_or(self): 337 """Test tokenizing OR operator""" 338 lexer = SCIMPathLexer("or") 339 token = lexer.get_next_token() 340 self.assertEqual(token.type, TokenType.OR) 341 self.assertEqual(token.value, "or") 342 343 def test_get_next_token_not(self): 344 """Test tokenizing NOT operator""" 345 lexer = SCIMPathLexer("not") 346 token = lexer.get_next_token() 347 self.assertEqual(token.type, TokenType.NOT) 348 self.assertEqual(token.value, "not") 349 350 def test_get_next_token_operators(self): 351 """Test tokenizing all comparison operators""" 352 operators = ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"] 353 for op in operators: 354 with self.subTest(operator=op): 355 lexer = SCIMPathLexer(op) 356 token = lexer.get_next_token() 357 self.assertEqual(token.type, TokenType.OPERATOR) 358 self.assertEqual(token.value, op) 359 360 def test_get_next_token_operators_case_insensitive(self): 361 """Test tokenizing operators with different cases""" 362 for op in ["EQ", "Eq", "NE", "Ne"]: 363 with self.subTest(operator=op): 364 lexer = SCIMPathLexer(op) 365 token = lexer.get_next_token() 366 self.assertEqual(token.type, TokenType.OPERATOR) 367 self.assertEqual(token.value, op.lower()) 368 369 def test_get_next_token_attribute(self): 370 """Test tokenizing attribute name""" 371 lexer = SCIMPathLexer("userName") 372 token = lexer.get_next_token() 373 self.assertEqual(token.type, TokenType.ATTRIBUTE) 374 self.assertEqual(token.value, "userName") 375 376 def test_get_next_token_attribute_with_underscore(self): 377 """Test tokenizing attribute name with underscore""" 378 lexer = SCIMPathLexer("_userName") 379 token = lexer.get_next_token() 380 self.assertEqual(token.type, TokenType.ATTRIBUTE) 381 self.assertEqual(token.value, "_userName") 382 383 def test_get_next_token_eof(self): 384 """Test tokenizing end of file""" 385 lexer = SCIMPathLexer("") 386 token = lexer.get_next_token() 387 self.assertEqual(token.type, TokenType.EOF) 388 self.assertEqual(token.value, "") 389 390 def test_get_next_token_with_whitespace(self): 391 """Test tokenizing with leading whitespace""" 392 lexer = SCIMPathLexer(" userName") 393 token = lexer.get_next_token() 394 self.assertEqual(token.type, TokenType.ATTRIBUTE) 395 self.assertEqual(token.value, "userName") 396 397 def test_get_next_token_skip_unknown_characters(self): 398 """Test that unknown characters are skipped""" 399 lexer = SCIMPathLexer("@#$userName") 400 token = lexer.get_next_token() 401 self.assertEqual(token.type, TokenType.ATTRIBUTE) 402 self.assertEqual(token.value, "userName") 403 404 def test_get_next_token_multiple_tokens(self): 405 """Test tokenizing multiple tokens in sequence""" 406 lexer = SCIMPathLexer("userName.givenName") 407 408 token1 = lexer.get_next_token() 409 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 410 self.assertEqual(token1.value, "userName") 411 412 token2 = lexer.get_next_token() 413 self.assertEqual(token2.type, TokenType.DOT) 414 self.assertEqual(token2.value, ".") 415 416 token3 = lexer.get_next_token() 417 self.assertEqual(token3.type, TokenType.ATTRIBUTE) 418 self.assertEqual(token3.value, "givenName") 419 420 token4 = lexer.get_next_token() 421 self.assertEqual(token4.type, TokenType.EOF) 422 423 def test_get_next_token_complex_filter(self): 424 """Test tokenizing complex filter expression""" 425 lexer = SCIMPathLexer('emails[type eq "work" and primary eq true]') 426 427 tokens = [] 428 while True: 429 token = lexer.get_next_token() 430 tokens.append(token) 431 if token.type == TokenType.EOF: 432 break 433 434 expected_types = [ 435 TokenType.ATTRIBUTE, # emails 436 TokenType.LBRACKET, # [ 437 TokenType.ATTRIBUTE, # type 438 TokenType.OPERATOR, # eq 439 TokenType.STRING, # "work" 440 TokenType.AND, # and 441 TokenType.ATTRIBUTE, # primary 442 TokenType.OPERATOR, # eq 443 TokenType.BOOLEAN, # true 444 TokenType.RBRACKET, # ] 445 TokenType.EOF, 446 ] 447 448 self.assertEqual(len(tokens), len(expected_types)) 449 for token, expected_type in zip(tokens, expected_types, strict=False): 450 self.assertEqual(token.type, expected_type) 451 452 def test_get_next_token_urn_attribute(self): 453 """Test tokenizing URN-based attribute""" 454 lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName") 455 456 token1 = lexer.get_next_token() 457 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 458 self.assertEqual(token1.value, SCIM_URN_USER) 459 460 token2 = lexer.get_next_token() 461 self.assertEqual(token2.type, TokenType.DOT) 462 463 token3 = lexer.get_next_token() 464 self.assertEqual(token3.type, TokenType.ATTRIBUTE) 465 self.assertEqual(token3.value, "userName") 466 467 def test_get_next_token_enterprise_urn(self): 468 """Test tokenizing enterprise URN""" 469 lexer = SCIMPathLexer(f"{SCIM_URN_USER_ENTERPRISE}.manager") 470 471 token1 = lexer.get_next_token() 472 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 473 self.assertEqual(token1.value, SCIM_URN_USER_ENTERPRISE) 474 475 token2 = lexer.get_next_token() 476 self.assertEqual(token2.type, TokenType.DOT) 477 478 def test_lexer_state_after_eof(self): 479 """Test lexer state after reaching EOF""" 480 lexer = SCIMPathLexer("a") 481 482 # Get first token 483 token1 = lexer.get_next_token() 484 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 485 486 # Get EOF token 487 token2 = lexer.get_next_token() 488 self.assertEqual(token2.type, TokenType.EOF) 489 490 # Should continue returning EOF 491 token3 = lexer.get_next_token() 492 self.assertEqual(token3.type, TokenType.EOF) 493 494 def test_read_identifier_edge_cases(self): 495 """Test read_identifier with edge cases""" 496 # Test identifier ending with colon 497 lexer = SCIMPathLexer("test:") 498 result = lexer.read_identifier() 499 self.assertEqual(result, "test:") 500 501 # Test identifier with numbers 502 lexer = SCIMPathLexer("test123") 503 result = lexer.read_identifier() 504 self.assertEqual(result, "test123") 505 506 def test_complex_urn_parsing(self): 507 """Test parsing complex URN with version numbers""" 508 urn = "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User" 509 lexer = SCIMPathLexer(urn) 510 result = lexer.read_identifier() 511 self.assertEqual(result, urn)
Test SCIMPathLexer class
60 def test_init(self): 61 """Test lexer initialization""" 62 lexer = SCIMPathLexer("test") 63 self.assertEqual(lexer.text, "test") 64 self.assertEqual(lexer.pos, 0) 65 self.assertEqual(lexer.current_char, "t") 66 self.assertIn(SCIM_URN_SCHEMA, lexer.schema_urns) 67 self.assertIn(SCIM_URN_GROUP, lexer.schema_urns) 68 self.assertIn(SCIM_URN_USER, lexer.schema_urns) 69 self.assertIn(SCIM_URN_USER_ENTERPRISE, lexer.schema_urns) 70 self.assertEqual( 71 lexer.OPERATORS, ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"] 72 )
Test lexer initialization
74 def test_init_empty_string(self): 75 """Test lexer initialization with empty string""" 76 lexer = SCIMPathLexer("") 77 self.assertEqual(lexer.text, "") 78 self.assertEqual(lexer.pos, 0) 79 self.assertIsNone(lexer.current_char)
Test lexer initialization with empty string
81 def test_advance(self): 82 """Test advance method""" 83 lexer = SCIMPathLexer("abc") 84 self.assertEqual(lexer.current_char, "a") 85 86 lexer.advance() 87 self.assertEqual(lexer.pos, 1) 88 self.assertEqual(lexer.current_char, "b") 89 90 lexer.advance() 91 self.assertEqual(lexer.pos, 2) 92 self.assertEqual(lexer.current_char, "c") 93 94 lexer.advance() 95 self.assertEqual(lexer.pos, 3) 96 self.assertIsNone(lexer.current_char)
Test advance method
98 def test_skip_whitespace(self): 99 """Test skip_whitespace method""" 100 lexer = SCIMPathLexer(" \t\n abc") 101 lexer.skip_whitespace() 102 self.assertEqual(lexer.current_char, "a")
Test skip_whitespace method
104 def test_skip_whitespace_only_whitespace(self): 105 """Test skip_whitespace with only whitespace""" 106 lexer = SCIMPathLexer(" \t\n ") 107 lexer.skip_whitespace() 108 self.assertIsNone(lexer.current_char)
Test skip_whitespace with only whitespace
110 def test_skip_whitespace_no_whitespace(self): 111 """Test skip_whitespace with no leading whitespace""" 112 lexer = SCIMPathLexer("abc") 113 original_pos = lexer.pos 114 lexer.skip_whitespace() 115 self.assertEqual(lexer.pos, original_pos) 116 self.assertEqual(lexer.current_char, "a")
Test skip_whitespace with no leading whitespace
118 def test_read_string_double_quotes(self): 119 """Test reading double-quoted string""" 120 lexer = SCIMPathLexer('"hello world"') 121 result = lexer.read_string('"') 122 self.assertEqual(result, "hello world") 123 self.assertIsNone(lexer.current_char) # Should be at end
Test reading double-quoted string
125 def test_read_string_single_quotes(self): 126 """Test reading single-quoted string""" 127 lexer = SCIMPathLexer("'hello world'") 128 result = lexer.read_string("'") 129 self.assertEqual(result, "hello world") 130 self.assertIsNone(lexer.current_char)
Test reading single-quoted string
132 def test_read_string_with_escapes(self): 133 """Test reading string with escape characters""" 134 lexer = SCIMPathLexer('"hello \\"world\\""') 135 result = lexer.read_string('"') 136 self.assertEqual(result, 'hello "world"')
Test reading string with escape characters
138 def test_read_string_with_backslash_at_end(self): 139 """Test reading string with backslash at end""" 140 lexer = SCIMPathLexer('"hello\\"') 141 result = lexer.read_string('"') 142 self.assertEqual(result, 'hello"')
Test reading string with backslash at end
144 def test_read_string_unclosed(self): 145 """Test reading unclosed string""" 146 lexer = SCIMPathLexer('"hello world') 147 result = lexer.read_string('"') 148 self.assertEqual(result, "hello world") 149 self.assertIsNone(lexer.current_char)
Test reading unclosed string
151 def test_read_string_empty(self): 152 """Test reading empty string""" 153 lexer = SCIMPathLexer('""') 154 result = lexer.read_string('"') 155 self.assertEqual(result, "")
Test reading empty string
157 def test_read_number_integer(self): 158 """Test reading integer number""" 159 lexer = SCIMPathLexer("123") 160 result = lexer.read_number() 161 self.assertEqual(result, "123") 162 self.assertIsNone(lexer.current_char)
Test reading integer number
164 def test_read_number_float(self): 165 """Test reading float number""" 166 lexer = SCIMPathLexer("123.456") 167 result = lexer.read_number() 168 self.assertEqual(result, "123.456") 169 self.assertIsNone(lexer.current_char)
Test reading float number
171 def test_read_number_with_multiple_dots(self): 172 """Test reading number with multiple dots (invalid but handled)""" 173 lexer = SCIMPathLexer("123.456.789") 174 result = lexer.read_number() 175 self.assertEqual(result, "123.456.789") 176 self.assertIsNone(lexer.current_char)
Test reading number with multiple dots (invalid but handled)
178 def test_read_number_starting_with_dot(self): 179 """Test reading number starting with dot""" 180 lexer = SCIMPathLexer(".123") 181 result = lexer.read_number() 182 self.assertEqual(result, ".123")
Test reading number starting with dot
184 def test_read_identifier_simple(self): 185 """Test reading simple identifier""" 186 lexer = SCIMPathLexer("userName") 187 result = lexer.read_identifier() 188 self.assertEqual(result, "userName") 189 self.assertIsNone(lexer.current_char)
Test reading simple identifier
191 def test_read_identifier_with_underscore(self): 192 """Test reading identifier with underscore""" 193 lexer = SCIMPathLexer("user_name") 194 result = lexer.read_identifier() 195 self.assertEqual(result, "user_name")
Test reading identifier with underscore
197 def test_read_identifier_with_hyphen(self): 198 """Test reading identifier with hyphen""" 199 lexer = SCIMPathLexer("user-name") 200 result = lexer.read_identifier() 201 self.assertEqual(result, "user-name")
Test reading identifier with hyphen
203 def test_read_identifier_with_colon(self): 204 """Test reading identifier with colon (URN format)""" 205 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User") 206 result = lexer.read_identifier() 207 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User")
Test reading identifier with colon (URN format)
209 def test_read_identifier_schema_urn(self): 210 """Test reading schema URN identifier""" 211 lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName") 212 result = lexer.read_identifier() 213 self.assertEqual(result, SCIM_URN_USER) 214 self.assertEqual(lexer.current_char, ".") # Should stop at dot and set current_char to dot
Test reading schema URN identifier
216 def test_read_identifier_with_version_number(self): 217 """Test reading identifier with version number (dots followed by digits)""" 218 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User") 219 result = lexer.read_identifier() 220 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User")
Test reading identifier with version number (dots followed by digits)
222 def test_read_identifier_partial_urn_match(self): 223 """Test reading identifier that partially matches URN""" 224 lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:CustomUser") 225 result = lexer.read_identifier() 226 self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:CustomUser")
Test reading identifier that partially matches URN
229 def test_get_next_token_dot(self): 230 """Test tokenizing dot""" 231 lexer = SCIMPathLexer(".") 232 token = lexer.get_next_token() 233 self.assertEqual(token.type, TokenType.DOT) 234 self.assertEqual(token.value, ".")
Test tokenizing dot
236 def test_get_next_token_lbracket(self): 237 """Test tokenizing left bracket""" 238 lexer = SCIMPathLexer("[") 239 token = lexer.get_next_token() 240 self.assertEqual(token.type, TokenType.LBRACKET) 241 self.assertEqual(token.value, "[")
Test tokenizing left bracket
243 def test_get_next_token_rbracket(self): 244 """Test tokenizing right bracket""" 245 lexer = SCIMPathLexer("]") 246 token = lexer.get_next_token() 247 self.assertEqual(token.type, TokenType.RBRACKET) 248 self.assertEqual(token.value, "]")
Test tokenizing right bracket
250 def test_get_next_token_lparen(self): 251 """Test tokenizing left parenthesis""" 252 lexer = SCIMPathLexer("(") 253 token = lexer.get_next_token() 254 self.assertEqual(token.type, TokenType.LPAREN) 255 self.assertEqual(token.value, "(")
Test tokenizing left parenthesis
257 def test_get_next_token_rparen(self): 258 """Test tokenizing right parenthesis""" 259 lexer = SCIMPathLexer(")") 260 token = lexer.get_next_token() 261 self.assertEqual(token.type, TokenType.RPAREN) 262 self.assertEqual(token.value, ")")
Test tokenizing right parenthesis
264 def test_get_next_token_string_double_quotes(self): 265 """Test tokenizing double-quoted string""" 266 lexer = SCIMPathLexer('"test string"') 267 token = lexer.get_next_token() 268 self.assertEqual(token.type, TokenType.STRING) 269 self.assertEqual(token.value, "test string")
Test tokenizing double-quoted string
271 def test_get_next_token_string_single_quotes(self): 272 """Test tokenizing single-quoted string""" 273 lexer = SCIMPathLexer("'test string'") 274 token = lexer.get_next_token() 275 self.assertEqual(token.type, TokenType.STRING) 276 self.assertEqual(token.value, "test string")
Test tokenizing single-quoted string
278 def test_get_next_token_number_integer(self): 279 """Test tokenizing integer""" 280 lexer = SCIMPathLexer("123") 281 token = lexer.get_next_token() 282 self.assertEqual(token.type, TokenType.NUMBER) 283 self.assertEqual(token.value, "123")
Test tokenizing integer
285 def test_get_next_token_number_float(self): 286 """Test tokenizing float""" 287 lexer = SCIMPathLexer("123.45") 288 token = lexer.get_next_token() 289 self.assertEqual(token.type, TokenType.NUMBER) 290 self.assertEqual(token.value, "123.45")
Test tokenizing float
292 def test_get_next_token_boolean_true(self): 293 """Test tokenizing boolean true""" 294 lexer = SCIMPathLexer("true") 295 token = lexer.get_next_token() 296 self.assertEqual(token.type, TokenType.BOOLEAN) 297 self.assertTrue(token.value)
Test tokenizing boolean true
299 def test_get_next_token_boolean_false(self): 300 """Test tokenizing boolean false""" 301 lexer = SCIMPathLexer("false") 302 token = lexer.get_next_token() 303 self.assertEqual(token.type, TokenType.BOOLEAN) 304 self.assertFalse(token.value)
Test tokenizing boolean false
306 def test_get_next_token_boolean_case_insensitive(self): 307 """Test tokenizing boolean with different cases""" 308 for value in ["TRUE", "True", "FALSE", "False"]: 309 with self.subTest(value=value): 310 lexer = SCIMPathLexer(value) 311 token = lexer.get_next_token() 312 self.assertEqual(token.type, TokenType.BOOLEAN)
Test tokenizing boolean with different cases
314 def test_get_next_token_null(self): 315 """Test tokenizing null""" 316 lexer = SCIMPathLexer("null") 317 token = lexer.get_next_token() 318 self.assertEqual(token.type, TokenType.NULL) 319 self.assertIsNone(token.value)
Test tokenizing null
321 def test_get_next_token_null_case_insensitive(self): 322 """Test tokenizing null with different cases""" 323 for value in ["NULL", "Null"]: 324 with self.subTest(value=value): 325 lexer = SCIMPathLexer(value) 326 token = lexer.get_next_token() 327 self.assertEqual(token.type, TokenType.NULL)
Test tokenizing null with different cases
329 def test_get_next_token_and(self): 330 """Test tokenizing AND operator""" 331 lexer = SCIMPathLexer("and") 332 token = lexer.get_next_token() 333 self.assertEqual(token.type, TokenType.AND) 334 self.assertEqual(token.value, "and")
Test tokenizing AND operator
336 def test_get_next_token_or(self): 337 """Test tokenizing OR operator""" 338 lexer = SCIMPathLexer("or") 339 token = lexer.get_next_token() 340 self.assertEqual(token.type, TokenType.OR) 341 self.assertEqual(token.value, "or")
Test tokenizing OR operator
343 def test_get_next_token_not(self): 344 """Test tokenizing NOT operator""" 345 lexer = SCIMPathLexer("not") 346 token = lexer.get_next_token() 347 self.assertEqual(token.type, TokenType.NOT) 348 self.assertEqual(token.value, "not")
Test tokenizing NOT operator
350 def test_get_next_token_operators(self): 351 """Test tokenizing all comparison operators""" 352 operators = ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"] 353 for op in operators: 354 with self.subTest(operator=op): 355 lexer = SCIMPathLexer(op) 356 token = lexer.get_next_token() 357 self.assertEqual(token.type, TokenType.OPERATOR) 358 self.assertEqual(token.value, op)
Test tokenizing all comparison operators
360 def test_get_next_token_operators_case_insensitive(self): 361 """Test tokenizing operators with different cases""" 362 for op in ["EQ", "Eq", "NE", "Ne"]: 363 with self.subTest(operator=op): 364 lexer = SCIMPathLexer(op) 365 token = lexer.get_next_token() 366 self.assertEqual(token.type, TokenType.OPERATOR) 367 self.assertEqual(token.value, op.lower())
Test tokenizing operators with different cases
369 def test_get_next_token_attribute(self): 370 """Test tokenizing attribute name""" 371 lexer = SCIMPathLexer("userName") 372 token = lexer.get_next_token() 373 self.assertEqual(token.type, TokenType.ATTRIBUTE) 374 self.assertEqual(token.value, "userName")
Test tokenizing attribute name
376 def test_get_next_token_attribute_with_underscore(self): 377 """Test tokenizing attribute name with underscore""" 378 lexer = SCIMPathLexer("_userName") 379 token = lexer.get_next_token() 380 self.assertEqual(token.type, TokenType.ATTRIBUTE) 381 self.assertEqual(token.value, "_userName")
Test tokenizing attribute name with underscore
383 def test_get_next_token_eof(self): 384 """Test tokenizing end of file""" 385 lexer = SCIMPathLexer("") 386 token = lexer.get_next_token() 387 self.assertEqual(token.type, TokenType.EOF) 388 self.assertEqual(token.value, "")
Test tokenizing end of file
390 def test_get_next_token_with_whitespace(self): 391 """Test tokenizing with leading whitespace""" 392 lexer = SCIMPathLexer(" userName") 393 token = lexer.get_next_token() 394 self.assertEqual(token.type, TokenType.ATTRIBUTE) 395 self.assertEqual(token.value, "userName")
Test tokenizing with leading whitespace
397 def test_get_next_token_skip_unknown_characters(self): 398 """Test that unknown characters are skipped""" 399 lexer = SCIMPathLexer("@#$userName") 400 token = lexer.get_next_token() 401 self.assertEqual(token.type, TokenType.ATTRIBUTE) 402 self.assertEqual(token.value, "userName")
Test that unknown characters are skipped
404 def test_get_next_token_multiple_tokens(self): 405 """Test tokenizing multiple tokens in sequence""" 406 lexer = SCIMPathLexer("userName.givenName") 407 408 token1 = lexer.get_next_token() 409 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 410 self.assertEqual(token1.value, "userName") 411 412 token2 = lexer.get_next_token() 413 self.assertEqual(token2.type, TokenType.DOT) 414 self.assertEqual(token2.value, ".") 415 416 token3 = lexer.get_next_token() 417 self.assertEqual(token3.type, TokenType.ATTRIBUTE) 418 self.assertEqual(token3.value, "givenName") 419 420 token4 = lexer.get_next_token() 421 self.assertEqual(token4.type, TokenType.EOF)
Test tokenizing multiple tokens in sequence
423 def test_get_next_token_complex_filter(self): 424 """Test tokenizing complex filter expression""" 425 lexer = SCIMPathLexer('emails[type eq "work" and primary eq true]') 426 427 tokens = [] 428 while True: 429 token = lexer.get_next_token() 430 tokens.append(token) 431 if token.type == TokenType.EOF: 432 break 433 434 expected_types = [ 435 TokenType.ATTRIBUTE, # emails 436 TokenType.LBRACKET, # [ 437 TokenType.ATTRIBUTE, # type 438 TokenType.OPERATOR, # eq 439 TokenType.STRING, # "work" 440 TokenType.AND, # and 441 TokenType.ATTRIBUTE, # primary 442 TokenType.OPERATOR, # eq 443 TokenType.BOOLEAN, # true 444 TokenType.RBRACKET, # ] 445 TokenType.EOF, 446 ] 447 448 self.assertEqual(len(tokens), len(expected_types)) 449 for token, expected_type in zip(tokens, expected_types, strict=False): 450 self.assertEqual(token.type, expected_type)
Test tokenizing complex filter expression
452 def test_get_next_token_urn_attribute(self): 453 """Test tokenizing URN-based attribute""" 454 lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName") 455 456 token1 = lexer.get_next_token() 457 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 458 self.assertEqual(token1.value, SCIM_URN_USER) 459 460 token2 = lexer.get_next_token() 461 self.assertEqual(token2.type, TokenType.DOT) 462 463 token3 = lexer.get_next_token() 464 self.assertEqual(token3.type, TokenType.ATTRIBUTE) 465 self.assertEqual(token3.value, "userName")
Test tokenizing URN-based attribute
467 def test_get_next_token_enterprise_urn(self): 468 """Test tokenizing enterprise URN""" 469 lexer = SCIMPathLexer(f"{SCIM_URN_USER_ENTERPRISE}.manager") 470 471 token1 = lexer.get_next_token() 472 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 473 self.assertEqual(token1.value, SCIM_URN_USER_ENTERPRISE) 474 475 token2 = lexer.get_next_token() 476 self.assertEqual(token2.type, TokenType.DOT)
Test tokenizing enterprise URN
478 def test_lexer_state_after_eof(self): 479 """Test lexer state after reaching EOF""" 480 lexer = SCIMPathLexer("a") 481 482 # Get first token 483 token1 = lexer.get_next_token() 484 self.assertEqual(token1.type, TokenType.ATTRIBUTE) 485 486 # Get EOF token 487 token2 = lexer.get_next_token() 488 self.assertEqual(token2.type, TokenType.EOF) 489 490 # Should continue returning EOF 491 token3 = lexer.get_next_token() 492 self.assertEqual(token3.type, TokenType.EOF)
Test lexer state after reaching EOF
494 def test_read_identifier_edge_cases(self): 495 """Test read_identifier with edge cases""" 496 # Test identifier ending with colon 497 lexer = SCIMPathLexer("test:") 498 result = lexer.read_identifier() 499 self.assertEqual(result, "test:") 500 501 # Test identifier with numbers 502 lexer = SCIMPathLexer("test123") 503 result = lexer.read_identifier() 504 self.assertEqual(result, "test123")
Test read_identifier with edge cases
506 def test_complex_urn_parsing(self): 507 """Test parsing complex URN with version numbers""" 508 urn = "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User" 509 lexer = SCIMPathLexer(urn) 510 result = lexer.read_identifier() 511 self.assertEqual(result, urn)
Test parsing complex URN with version numbers