authentik.sources.scim.tests.test_lexer

  1from unittest import TestCase
  2
  3from authentik.sources.scim.constants import (
  4    SCIM_URN_GROUP,
  5    SCIM_URN_SCHEMA,
  6    SCIM_URN_USER,
  7    SCIM_URN_USER_ENTERPRISE,
  8)
  9from authentik.sources.scim.patch.lexer import SCIMPathLexer, Token, TokenType
 10
 11
 12class TestTokenType(TestCase):
 13    """Test TokenType enum"""
 14
 15    def test_token_type_values(self):
 16        """Test that all token types have correct values"""
 17        self.assertEqual(TokenType.ATTRIBUTE.value, "ATTRIBUTE")
 18        self.assertEqual(TokenType.DOT.value, "DOT")
 19        self.assertEqual(TokenType.LBRACKET.value, "LBRACKET")
 20        self.assertEqual(TokenType.RBRACKET.value, "RBRACKET")
 21        self.assertEqual(TokenType.LPAREN.value, "LPAREN")
 22        self.assertEqual(TokenType.RPAREN.value, "RPAREN")
 23        self.assertEqual(TokenType.STRING.value, "STRING")
 24        self.assertEqual(TokenType.NUMBER.value, "NUMBER")
 25        self.assertEqual(TokenType.BOOLEAN.value, "BOOLEAN")
 26        self.assertEqual(TokenType.NULL.value, "NULL")
 27        self.assertEqual(TokenType.OPERATOR.value, "OPERATOR")
 28        self.assertEqual(TokenType.AND.value, "AND")
 29        self.assertEqual(TokenType.OR.value, "OR")
 30        self.assertEqual(TokenType.NOT.value, "NOT")
 31        self.assertEqual(TokenType.EOF.value, "EOF")
 32
 33
 34class TestToken(TestCase):
 35    """Test Token dataclass"""
 36
 37    def test_token_creation(self):
 38        """Test token creation with all parameters"""
 39        token = Token(TokenType.ATTRIBUTE, "userName", 5)
 40        self.assertEqual(token.type, TokenType.ATTRIBUTE)
 41        self.assertEqual(token.value, "userName")
 42        self.assertEqual(token.position, 5)
 43
 44    def test_token_creation_default_position(self):
 45        """Test token creation with default position"""
 46        token = Token(TokenType.DOT, ".")
 47        self.assertEqual(token.type, TokenType.DOT)
 48        self.assertEqual(token.value, ".")
 49        self.assertEqual(token.position, 0)
 50
 51
 52class TestSCIMPathLexer(TestCase):
 53    """Test SCIMPathLexer class"""
 54
 55    def setUp(self):
 56        """Set up test fixtures"""
 57        self.simple_lexer = SCIMPathLexer("userName")
 58
 59    def test_init(self):
 60        """Test lexer initialization"""
 61        lexer = SCIMPathLexer("test")
 62        self.assertEqual(lexer.text, "test")
 63        self.assertEqual(lexer.pos, 0)
 64        self.assertEqual(lexer.current_char, "t")
 65        self.assertIn(SCIM_URN_SCHEMA, lexer.schema_urns)
 66        self.assertIn(SCIM_URN_GROUP, lexer.schema_urns)
 67        self.assertIn(SCIM_URN_USER, lexer.schema_urns)
 68        self.assertIn(SCIM_URN_USER_ENTERPRISE, lexer.schema_urns)
 69        self.assertEqual(
 70            lexer.OPERATORS, ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"]
 71        )
 72
 73    def test_init_empty_string(self):
 74        """Test lexer initialization with empty string"""
 75        lexer = SCIMPathLexer("")
 76        self.assertEqual(lexer.text, "")
 77        self.assertEqual(lexer.pos, 0)
 78        self.assertIsNone(lexer.current_char)
 79
 80    def test_advance(self):
 81        """Test advance method"""
 82        lexer = SCIMPathLexer("abc")
 83        self.assertEqual(lexer.current_char, "a")
 84
 85        lexer.advance()
 86        self.assertEqual(lexer.pos, 1)
 87        self.assertEqual(lexer.current_char, "b")
 88
 89        lexer.advance()
 90        self.assertEqual(lexer.pos, 2)
 91        self.assertEqual(lexer.current_char, "c")
 92
 93        lexer.advance()
 94        self.assertEqual(lexer.pos, 3)
 95        self.assertIsNone(lexer.current_char)
 96
 97    def test_skip_whitespace(self):
 98        """Test skip_whitespace method"""
 99        lexer = SCIMPathLexer("   \t\n  abc")
100        lexer.skip_whitespace()
101        self.assertEqual(lexer.current_char, "a")
102
103    def test_skip_whitespace_only_whitespace(self):
104        """Test skip_whitespace with only whitespace"""
105        lexer = SCIMPathLexer("   \t\n  ")
106        lexer.skip_whitespace()
107        self.assertIsNone(lexer.current_char)
108
109    def test_skip_whitespace_no_whitespace(self):
110        """Test skip_whitespace with no leading whitespace"""
111        lexer = SCIMPathLexer("abc")
112        original_pos = lexer.pos
113        lexer.skip_whitespace()
114        self.assertEqual(lexer.pos, original_pos)
115        self.assertEqual(lexer.current_char, "a")
116
117    def test_read_string_double_quotes(self):
118        """Test reading double-quoted string"""
119        lexer = SCIMPathLexer('"hello world"')
120        result = lexer.read_string('"')
121        self.assertEqual(result, "hello world")
122        self.assertIsNone(lexer.current_char)  # Should be at end
123
124    def test_read_string_single_quotes(self):
125        """Test reading single-quoted string"""
126        lexer = SCIMPathLexer("'hello world'")
127        result = lexer.read_string("'")
128        self.assertEqual(result, "hello world")
129        self.assertIsNone(lexer.current_char)
130
131    def test_read_string_with_escapes(self):
132        """Test reading string with escape characters"""
133        lexer = SCIMPathLexer('"hello \\"world\\""')
134        result = lexer.read_string('"')
135        self.assertEqual(result, 'hello "world"')
136
137    def test_read_string_with_backslash_at_end(self):
138        """Test reading string with backslash at end"""
139        lexer = SCIMPathLexer('"hello\\"')
140        result = lexer.read_string('"')
141        self.assertEqual(result, 'hello"')
142
143    def test_read_string_unclosed(self):
144        """Test reading unclosed string"""
145        lexer = SCIMPathLexer('"hello world')
146        result = lexer.read_string('"')
147        self.assertEqual(result, "hello world")
148        self.assertIsNone(lexer.current_char)
149
150    def test_read_string_empty(self):
151        """Test reading empty string"""
152        lexer = SCIMPathLexer('""')
153        result = lexer.read_string('"')
154        self.assertEqual(result, "")
155
156    def test_read_number_integer(self):
157        """Test reading integer number"""
158        lexer = SCIMPathLexer("123")
159        result = lexer.read_number()
160        self.assertEqual(result, "123")
161        self.assertIsNone(lexer.current_char)
162
163    def test_read_number_float(self):
164        """Test reading float number"""
165        lexer = SCIMPathLexer("123.456")
166        result = lexer.read_number()
167        self.assertEqual(result, "123.456")
168        self.assertIsNone(lexer.current_char)
169
170    def test_read_number_with_multiple_dots(self):
171        """Test reading number with multiple dots (invalid but handled)"""
172        lexer = SCIMPathLexer("123.456.789")
173        result = lexer.read_number()
174        self.assertEqual(result, "123.456.789")
175        self.assertIsNone(lexer.current_char)
176
177    def test_read_number_starting_with_dot(self):
178        """Test reading number starting with dot"""
179        lexer = SCIMPathLexer(".123")
180        result = lexer.read_number()
181        self.assertEqual(result, ".123")
182
183    def test_read_identifier_simple(self):
184        """Test reading simple identifier"""
185        lexer = SCIMPathLexer("userName")
186        result = lexer.read_identifier()
187        self.assertEqual(result, "userName")
188        self.assertIsNone(lexer.current_char)
189
190    def test_read_identifier_with_underscore(self):
191        """Test reading identifier with underscore"""
192        lexer = SCIMPathLexer("user_name")
193        result = lexer.read_identifier()
194        self.assertEqual(result, "user_name")
195
196    def test_read_identifier_with_hyphen(self):
197        """Test reading identifier with hyphen"""
198        lexer = SCIMPathLexer("user-name")
199        result = lexer.read_identifier()
200        self.assertEqual(result, "user-name")
201
202    def test_read_identifier_with_colon(self):
203        """Test reading identifier with colon (URN format)"""
204        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User")
205        result = lexer.read_identifier()
206        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User")
207
208    def test_read_identifier_schema_urn(self):
209        """Test reading schema URN identifier"""
210        lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName")
211        result = lexer.read_identifier()
212        self.assertEqual(result, SCIM_URN_USER)
213        self.assertEqual(lexer.current_char, ".")  # Should stop at dot and set current_char to dot
214
215    def test_read_identifier_with_version_number(self):
216        """Test reading identifier with version number (dots followed by digits)"""
217        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User")
218        result = lexer.read_identifier()
219        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User")
220
221    def test_read_identifier_partial_urn_match(self):
222        """Test reading identifier that partially matches URN"""
223        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:CustomUser")
224        result = lexer.read_identifier()
225        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:CustomUser")
226
227    # Test get_next_token method
228    def test_get_next_token_dot(self):
229        """Test tokenizing dot"""
230        lexer = SCIMPathLexer(".")
231        token = lexer.get_next_token()
232        self.assertEqual(token.type, TokenType.DOT)
233        self.assertEqual(token.value, ".")
234
235    def test_get_next_token_lbracket(self):
236        """Test tokenizing left bracket"""
237        lexer = SCIMPathLexer("[")
238        token = lexer.get_next_token()
239        self.assertEqual(token.type, TokenType.LBRACKET)
240        self.assertEqual(token.value, "[")
241
242    def test_get_next_token_rbracket(self):
243        """Test tokenizing right bracket"""
244        lexer = SCIMPathLexer("]")
245        token = lexer.get_next_token()
246        self.assertEqual(token.type, TokenType.RBRACKET)
247        self.assertEqual(token.value, "]")
248
249    def test_get_next_token_lparen(self):
250        """Test tokenizing left parenthesis"""
251        lexer = SCIMPathLexer("(")
252        token = lexer.get_next_token()
253        self.assertEqual(token.type, TokenType.LPAREN)
254        self.assertEqual(token.value, "(")
255
256    def test_get_next_token_rparen(self):
257        """Test tokenizing right parenthesis"""
258        lexer = SCIMPathLexer(")")
259        token = lexer.get_next_token()
260        self.assertEqual(token.type, TokenType.RPAREN)
261        self.assertEqual(token.value, ")")
262
263    def test_get_next_token_string_double_quotes(self):
264        """Test tokenizing double-quoted string"""
265        lexer = SCIMPathLexer('"test string"')
266        token = lexer.get_next_token()
267        self.assertEqual(token.type, TokenType.STRING)
268        self.assertEqual(token.value, "test string")
269
270    def test_get_next_token_string_single_quotes(self):
271        """Test tokenizing single-quoted string"""
272        lexer = SCIMPathLexer("'test string'")
273        token = lexer.get_next_token()
274        self.assertEqual(token.type, TokenType.STRING)
275        self.assertEqual(token.value, "test string")
276
277    def test_get_next_token_number_integer(self):
278        """Test tokenizing integer"""
279        lexer = SCIMPathLexer("123")
280        token = lexer.get_next_token()
281        self.assertEqual(token.type, TokenType.NUMBER)
282        self.assertEqual(token.value, "123")
283
284    def test_get_next_token_number_float(self):
285        """Test tokenizing float"""
286        lexer = SCIMPathLexer("123.45")
287        token = lexer.get_next_token()
288        self.assertEqual(token.type, TokenType.NUMBER)
289        self.assertEqual(token.value, "123.45")
290
291    def test_get_next_token_boolean_true(self):
292        """Test tokenizing boolean true"""
293        lexer = SCIMPathLexer("true")
294        token = lexer.get_next_token()
295        self.assertEqual(token.type, TokenType.BOOLEAN)
296        self.assertTrue(token.value)
297
298    def test_get_next_token_boolean_false(self):
299        """Test tokenizing boolean false"""
300        lexer = SCIMPathLexer("false")
301        token = lexer.get_next_token()
302        self.assertEqual(token.type, TokenType.BOOLEAN)
303        self.assertFalse(token.value)
304
305    def test_get_next_token_boolean_case_insensitive(self):
306        """Test tokenizing boolean with different cases"""
307        for value in ["TRUE", "True", "FALSE", "False"]:
308            with self.subTest(value=value):
309                lexer = SCIMPathLexer(value)
310                token = lexer.get_next_token()
311                self.assertEqual(token.type, TokenType.BOOLEAN)
312
313    def test_get_next_token_null(self):
314        """Test tokenizing null"""
315        lexer = SCIMPathLexer("null")
316        token = lexer.get_next_token()
317        self.assertEqual(token.type, TokenType.NULL)
318        self.assertIsNone(token.value)
319
320    def test_get_next_token_null_case_insensitive(self):
321        """Test tokenizing null with different cases"""
322        for value in ["NULL", "Null"]:
323            with self.subTest(value=value):
324                lexer = SCIMPathLexer(value)
325                token = lexer.get_next_token()
326                self.assertEqual(token.type, TokenType.NULL)
327
328    def test_get_next_token_and(self):
329        """Test tokenizing AND operator"""
330        lexer = SCIMPathLexer("and")
331        token = lexer.get_next_token()
332        self.assertEqual(token.type, TokenType.AND)
333        self.assertEqual(token.value, "and")
334
335    def test_get_next_token_or(self):
336        """Test tokenizing OR operator"""
337        lexer = SCIMPathLexer("or")
338        token = lexer.get_next_token()
339        self.assertEqual(token.type, TokenType.OR)
340        self.assertEqual(token.value, "or")
341
342    def test_get_next_token_not(self):
343        """Test tokenizing NOT operator"""
344        lexer = SCIMPathLexer("not")
345        token = lexer.get_next_token()
346        self.assertEqual(token.type, TokenType.NOT)
347        self.assertEqual(token.value, "not")
348
349    def test_get_next_token_operators(self):
350        """Test tokenizing all comparison operators"""
351        operators = ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"]
352        for op in operators:
353            with self.subTest(operator=op):
354                lexer = SCIMPathLexer(op)
355                token = lexer.get_next_token()
356                self.assertEqual(token.type, TokenType.OPERATOR)
357                self.assertEqual(token.value, op)
358
359    def test_get_next_token_operators_case_insensitive(self):
360        """Test tokenizing operators with different cases"""
361        for op in ["EQ", "Eq", "NE", "Ne"]:
362            with self.subTest(operator=op):
363                lexer = SCIMPathLexer(op)
364                token = lexer.get_next_token()
365                self.assertEqual(token.type, TokenType.OPERATOR)
366                self.assertEqual(token.value, op.lower())
367
368    def test_get_next_token_attribute(self):
369        """Test tokenizing attribute name"""
370        lexer = SCIMPathLexer("userName")
371        token = lexer.get_next_token()
372        self.assertEqual(token.type, TokenType.ATTRIBUTE)
373        self.assertEqual(token.value, "userName")
374
375    def test_get_next_token_attribute_with_underscore(self):
376        """Test tokenizing attribute name with underscore"""
377        lexer = SCIMPathLexer("_userName")
378        token = lexer.get_next_token()
379        self.assertEqual(token.type, TokenType.ATTRIBUTE)
380        self.assertEqual(token.value, "_userName")
381
382    def test_get_next_token_eof(self):
383        """Test tokenizing end of file"""
384        lexer = SCIMPathLexer("")
385        token = lexer.get_next_token()
386        self.assertEqual(token.type, TokenType.EOF)
387        self.assertEqual(token.value, "")
388
389    def test_get_next_token_with_whitespace(self):
390        """Test tokenizing with leading whitespace"""
391        lexer = SCIMPathLexer("   userName")
392        token = lexer.get_next_token()
393        self.assertEqual(token.type, TokenType.ATTRIBUTE)
394        self.assertEqual(token.value, "userName")
395
396    def test_get_next_token_skip_unknown_characters(self):
397        """Test that unknown characters are skipped"""
398        lexer = SCIMPathLexer("@#$userName")
399        token = lexer.get_next_token()
400        self.assertEqual(token.type, TokenType.ATTRIBUTE)
401        self.assertEqual(token.value, "userName")
402
403    def test_get_next_token_multiple_tokens(self):
404        """Test tokenizing multiple tokens in sequence"""
405        lexer = SCIMPathLexer("userName.givenName")
406
407        token1 = lexer.get_next_token()
408        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
409        self.assertEqual(token1.value, "userName")
410
411        token2 = lexer.get_next_token()
412        self.assertEqual(token2.type, TokenType.DOT)
413        self.assertEqual(token2.value, ".")
414
415        token3 = lexer.get_next_token()
416        self.assertEqual(token3.type, TokenType.ATTRIBUTE)
417        self.assertEqual(token3.value, "givenName")
418
419        token4 = lexer.get_next_token()
420        self.assertEqual(token4.type, TokenType.EOF)
421
422    def test_get_next_token_complex_filter(self):
423        """Test tokenizing complex filter expression"""
424        lexer = SCIMPathLexer('emails[type eq "work" and primary eq true]')
425
426        tokens = []
427        while True:
428            token = lexer.get_next_token()
429            tokens.append(token)
430            if token.type == TokenType.EOF:
431                break
432
433        expected_types = [
434            TokenType.ATTRIBUTE,  # emails
435            TokenType.LBRACKET,  # [
436            TokenType.ATTRIBUTE,  # type
437            TokenType.OPERATOR,  # eq
438            TokenType.STRING,  # "work"
439            TokenType.AND,  # and
440            TokenType.ATTRIBUTE,  # primary
441            TokenType.OPERATOR,  # eq
442            TokenType.BOOLEAN,  # true
443            TokenType.RBRACKET,  # ]
444            TokenType.EOF,
445        ]
446
447        self.assertEqual(len(tokens), len(expected_types))
448        for token, expected_type in zip(tokens, expected_types, strict=False):
449            self.assertEqual(token.type, expected_type)
450
451    def test_get_next_token_urn_attribute(self):
452        """Test tokenizing URN-based attribute"""
453        lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName")
454
455        token1 = lexer.get_next_token()
456        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
457        self.assertEqual(token1.value, SCIM_URN_USER)
458
459        token2 = lexer.get_next_token()
460        self.assertEqual(token2.type, TokenType.DOT)
461
462        token3 = lexer.get_next_token()
463        self.assertEqual(token3.type, TokenType.ATTRIBUTE)
464        self.assertEqual(token3.value, "userName")
465
466    def test_get_next_token_enterprise_urn(self):
467        """Test tokenizing enterprise URN"""
468        lexer = SCIMPathLexer(f"{SCIM_URN_USER_ENTERPRISE}.manager")
469
470        token1 = lexer.get_next_token()
471        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
472        self.assertEqual(token1.value, SCIM_URN_USER_ENTERPRISE)
473
474        token2 = lexer.get_next_token()
475        self.assertEqual(token2.type, TokenType.DOT)
476
477    def test_lexer_state_after_eof(self):
478        """Test lexer state after reaching EOF"""
479        lexer = SCIMPathLexer("a")
480
481        # Get first token
482        token1 = lexer.get_next_token()
483        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
484
485        # Get EOF token
486        token2 = lexer.get_next_token()
487        self.assertEqual(token2.type, TokenType.EOF)
488
489        # Should continue returning EOF
490        token3 = lexer.get_next_token()
491        self.assertEqual(token3.type, TokenType.EOF)
492
493    def test_read_identifier_edge_cases(self):
494        """Test read_identifier with edge cases"""
495        # Test identifier ending with colon
496        lexer = SCIMPathLexer("test:")
497        result = lexer.read_identifier()
498        self.assertEqual(result, "test:")
499
500        # Test identifier with numbers
501        lexer = SCIMPathLexer("test123")
502        result = lexer.read_identifier()
503        self.assertEqual(result, "test123")
504
505    def test_complex_urn_parsing(self):
506        """Test parsing complex URN with version numbers"""
507        urn = "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
508        lexer = SCIMPathLexer(urn)
509        result = lexer.read_identifier()
510        self.assertEqual(result, urn)
class TestTokenType(unittest.case.TestCase):
13class TestTokenType(TestCase):
14    """Test TokenType enum"""
15
16    def test_token_type_values(self):
17        """Test that all token types have correct values"""
18        self.assertEqual(TokenType.ATTRIBUTE.value, "ATTRIBUTE")
19        self.assertEqual(TokenType.DOT.value, "DOT")
20        self.assertEqual(TokenType.LBRACKET.value, "LBRACKET")
21        self.assertEqual(TokenType.RBRACKET.value, "RBRACKET")
22        self.assertEqual(TokenType.LPAREN.value, "LPAREN")
23        self.assertEqual(TokenType.RPAREN.value, "RPAREN")
24        self.assertEqual(TokenType.STRING.value, "STRING")
25        self.assertEqual(TokenType.NUMBER.value, "NUMBER")
26        self.assertEqual(TokenType.BOOLEAN.value, "BOOLEAN")
27        self.assertEqual(TokenType.NULL.value, "NULL")
28        self.assertEqual(TokenType.OPERATOR.value, "OPERATOR")
29        self.assertEqual(TokenType.AND.value, "AND")
30        self.assertEqual(TokenType.OR.value, "OR")
31        self.assertEqual(TokenType.NOT.value, "NOT")
32        self.assertEqual(TokenType.EOF.value, "EOF")

Test TokenType enum

def test_token_type_values(self):
16    def test_token_type_values(self):
17        """Test that all token types have correct values"""
18        self.assertEqual(TokenType.ATTRIBUTE.value, "ATTRIBUTE")
19        self.assertEqual(TokenType.DOT.value, "DOT")
20        self.assertEqual(TokenType.LBRACKET.value, "LBRACKET")
21        self.assertEqual(TokenType.RBRACKET.value, "RBRACKET")
22        self.assertEqual(TokenType.LPAREN.value, "LPAREN")
23        self.assertEqual(TokenType.RPAREN.value, "RPAREN")
24        self.assertEqual(TokenType.STRING.value, "STRING")
25        self.assertEqual(TokenType.NUMBER.value, "NUMBER")
26        self.assertEqual(TokenType.BOOLEAN.value, "BOOLEAN")
27        self.assertEqual(TokenType.NULL.value, "NULL")
28        self.assertEqual(TokenType.OPERATOR.value, "OPERATOR")
29        self.assertEqual(TokenType.AND.value, "AND")
30        self.assertEqual(TokenType.OR.value, "OR")
31        self.assertEqual(TokenType.NOT.value, "NOT")
32        self.assertEqual(TokenType.EOF.value, "EOF")

Test that all token types have correct values

class TestToken(unittest.case.TestCase):
35class TestToken(TestCase):
36    """Test Token dataclass"""
37
38    def test_token_creation(self):
39        """Test token creation with all parameters"""
40        token = Token(TokenType.ATTRIBUTE, "userName", 5)
41        self.assertEqual(token.type, TokenType.ATTRIBUTE)
42        self.assertEqual(token.value, "userName")
43        self.assertEqual(token.position, 5)
44
45    def test_token_creation_default_position(self):
46        """Test token creation with default position"""
47        token = Token(TokenType.DOT, ".")
48        self.assertEqual(token.type, TokenType.DOT)
49        self.assertEqual(token.value, ".")
50        self.assertEqual(token.position, 0)

Test Token dataclass

def test_token_creation(self):
38    def test_token_creation(self):
39        """Test token creation with all parameters"""
40        token = Token(TokenType.ATTRIBUTE, "userName", 5)
41        self.assertEqual(token.type, TokenType.ATTRIBUTE)
42        self.assertEqual(token.value, "userName")
43        self.assertEqual(token.position, 5)

Test token creation with all parameters

def test_token_creation_default_position(self):
45    def test_token_creation_default_position(self):
46        """Test token creation with default position"""
47        token = Token(TokenType.DOT, ".")
48        self.assertEqual(token.type, TokenType.DOT)
49        self.assertEqual(token.value, ".")
50        self.assertEqual(token.position, 0)

Test token creation with default position

class TestSCIMPathLexer(unittest.case.TestCase):
 53class TestSCIMPathLexer(TestCase):
 54    """Test SCIMPathLexer class"""
 55
 56    def setUp(self):
 57        """Set up test fixtures"""
 58        self.simple_lexer = SCIMPathLexer("userName")
 59
 60    def test_init(self):
 61        """Test lexer initialization"""
 62        lexer = SCIMPathLexer("test")
 63        self.assertEqual(lexer.text, "test")
 64        self.assertEqual(lexer.pos, 0)
 65        self.assertEqual(lexer.current_char, "t")
 66        self.assertIn(SCIM_URN_SCHEMA, lexer.schema_urns)
 67        self.assertIn(SCIM_URN_GROUP, lexer.schema_urns)
 68        self.assertIn(SCIM_URN_USER, lexer.schema_urns)
 69        self.assertIn(SCIM_URN_USER_ENTERPRISE, lexer.schema_urns)
 70        self.assertEqual(
 71            lexer.OPERATORS, ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"]
 72        )
 73
 74    def test_init_empty_string(self):
 75        """Test lexer initialization with empty string"""
 76        lexer = SCIMPathLexer("")
 77        self.assertEqual(lexer.text, "")
 78        self.assertEqual(lexer.pos, 0)
 79        self.assertIsNone(lexer.current_char)
 80
 81    def test_advance(self):
 82        """Test advance method"""
 83        lexer = SCIMPathLexer("abc")
 84        self.assertEqual(lexer.current_char, "a")
 85
 86        lexer.advance()
 87        self.assertEqual(lexer.pos, 1)
 88        self.assertEqual(lexer.current_char, "b")
 89
 90        lexer.advance()
 91        self.assertEqual(lexer.pos, 2)
 92        self.assertEqual(lexer.current_char, "c")
 93
 94        lexer.advance()
 95        self.assertEqual(lexer.pos, 3)
 96        self.assertIsNone(lexer.current_char)
 97
 98    def test_skip_whitespace(self):
 99        """Test skip_whitespace method"""
100        lexer = SCIMPathLexer("   \t\n  abc")
101        lexer.skip_whitespace()
102        self.assertEqual(lexer.current_char, "a")
103
104    def test_skip_whitespace_only_whitespace(self):
105        """Test skip_whitespace with only whitespace"""
106        lexer = SCIMPathLexer("   \t\n  ")
107        lexer.skip_whitespace()
108        self.assertIsNone(lexer.current_char)
109
110    def test_skip_whitespace_no_whitespace(self):
111        """Test skip_whitespace with no leading whitespace"""
112        lexer = SCIMPathLexer("abc")
113        original_pos = lexer.pos
114        lexer.skip_whitespace()
115        self.assertEqual(lexer.pos, original_pos)
116        self.assertEqual(lexer.current_char, "a")
117
118    def test_read_string_double_quotes(self):
119        """Test reading double-quoted string"""
120        lexer = SCIMPathLexer('"hello world"')
121        result = lexer.read_string('"')
122        self.assertEqual(result, "hello world")
123        self.assertIsNone(lexer.current_char)  # Should be at end
124
125    def test_read_string_single_quotes(self):
126        """Test reading single-quoted string"""
127        lexer = SCIMPathLexer("'hello world'")
128        result = lexer.read_string("'")
129        self.assertEqual(result, "hello world")
130        self.assertIsNone(lexer.current_char)
131
132    def test_read_string_with_escapes(self):
133        """Test reading string with escape characters"""
134        lexer = SCIMPathLexer('"hello \\"world\\""')
135        result = lexer.read_string('"')
136        self.assertEqual(result, 'hello "world"')
137
138    def test_read_string_with_backslash_at_end(self):
139        """Test reading string with backslash at end"""
140        lexer = SCIMPathLexer('"hello\\"')
141        result = lexer.read_string('"')
142        self.assertEqual(result, 'hello"')
143
144    def test_read_string_unclosed(self):
145        """Test reading unclosed string"""
146        lexer = SCIMPathLexer('"hello world')
147        result = lexer.read_string('"')
148        self.assertEqual(result, "hello world")
149        self.assertIsNone(lexer.current_char)
150
151    def test_read_string_empty(self):
152        """Test reading empty string"""
153        lexer = SCIMPathLexer('""')
154        result = lexer.read_string('"')
155        self.assertEqual(result, "")
156
157    def test_read_number_integer(self):
158        """Test reading integer number"""
159        lexer = SCIMPathLexer("123")
160        result = lexer.read_number()
161        self.assertEqual(result, "123")
162        self.assertIsNone(lexer.current_char)
163
164    def test_read_number_float(self):
165        """Test reading float number"""
166        lexer = SCIMPathLexer("123.456")
167        result = lexer.read_number()
168        self.assertEqual(result, "123.456")
169        self.assertIsNone(lexer.current_char)
170
171    def test_read_number_with_multiple_dots(self):
172        """Test reading number with multiple dots (invalid but handled)"""
173        lexer = SCIMPathLexer("123.456.789")
174        result = lexer.read_number()
175        self.assertEqual(result, "123.456.789")
176        self.assertIsNone(lexer.current_char)
177
178    def test_read_number_starting_with_dot(self):
179        """Test reading number starting with dot"""
180        lexer = SCIMPathLexer(".123")
181        result = lexer.read_number()
182        self.assertEqual(result, ".123")
183
184    def test_read_identifier_simple(self):
185        """Test reading simple identifier"""
186        lexer = SCIMPathLexer("userName")
187        result = lexer.read_identifier()
188        self.assertEqual(result, "userName")
189        self.assertIsNone(lexer.current_char)
190
191    def test_read_identifier_with_underscore(self):
192        """Test reading identifier with underscore"""
193        lexer = SCIMPathLexer("user_name")
194        result = lexer.read_identifier()
195        self.assertEqual(result, "user_name")
196
197    def test_read_identifier_with_hyphen(self):
198        """Test reading identifier with hyphen"""
199        lexer = SCIMPathLexer("user-name")
200        result = lexer.read_identifier()
201        self.assertEqual(result, "user-name")
202
203    def test_read_identifier_with_colon(self):
204        """Test reading identifier with colon (URN format)"""
205        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User")
206        result = lexer.read_identifier()
207        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User")
208
209    def test_read_identifier_schema_urn(self):
210        """Test reading schema URN identifier"""
211        lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName")
212        result = lexer.read_identifier()
213        self.assertEqual(result, SCIM_URN_USER)
214        self.assertEqual(lexer.current_char, ".")  # Should stop at dot and set current_char to dot
215
216    def test_read_identifier_with_version_number(self):
217        """Test reading identifier with version number (dots followed by digits)"""
218        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User")
219        result = lexer.read_identifier()
220        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User")
221
222    def test_read_identifier_partial_urn_match(self):
223        """Test reading identifier that partially matches URN"""
224        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:CustomUser")
225        result = lexer.read_identifier()
226        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:CustomUser")
227
228    # Test get_next_token method
229    def test_get_next_token_dot(self):
230        """Test tokenizing dot"""
231        lexer = SCIMPathLexer(".")
232        token = lexer.get_next_token()
233        self.assertEqual(token.type, TokenType.DOT)
234        self.assertEqual(token.value, ".")
235
236    def test_get_next_token_lbracket(self):
237        """Test tokenizing left bracket"""
238        lexer = SCIMPathLexer("[")
239        token = lexer.get_next_token()
240        self.assertEqual(token.type, TokenType.LBRACKET)
241        self.assertEqual(token.value, "[")
242
243    def test_get_next_token_rbracket(self):
244        """Test tokenizing right bracket"""
245        lexer = SCIMPathLexer("]")
246        token = lexer.get_next_token()
247        self.assertEqual(token.type, TokenType.RBRACKET)
248        self.assertEqual(token.value, "]")
249
250    def test_get_next_token_lparen(self):
251        """Test tokenizing left parenthesis"""
252        lexer = SCIMPathLexer("(")
253        token = lexer.get_next_token()
254        self.assertEqual(token.type, TokenType.LPAREN)
255        self.assertEqual(token.value, "(")
256
257    def test_get_next_token_rparen(self):
258        """Test tokenizing right parenthesis"""
259        lexer = SCIMPathLexer(")")
260        token = lexer.get_next_token()
261        self.assertEqual(token.type, TokenType.RPAREN)
262        self.assertEqual(token.value, ")")
263
264    def test_get_next_token_string_double_quotes(self):
265        """Test tokenizing double-quoted string"""
266        lexer = SCIMPathLexer('"test string"')
267        token = lexer.get_next_token()
268        self.assertEqual(token.type, TokenType.STRING)
269        self.assertEqual(token.value, "test string")
270
271    def test_get_next_token_string_single_quotes(self):
272        """Test tokenizing single-quoted string"""
273        lexer = SCIMPathLexer("'test string'")
274        token = lexer.get_next_token()
275        self.assertEqual(token.type, TokenType.STRING)
276        self.assertEqual(token.value, "test string")
277
278    def test_get_next_token_number_integer(self):
279        """Test tokenizing integer"""
280        lexer = SCIMPathLexer("123")
281        token = lexer.get_next_token()
282        self.assertEqual(token.type, TokenType.NUMBER)
283        self.assertEqual(token.value, "123")
284
285    def test_get_next_token_number_float(self):
286        """Test tokenizing float"""
287        lexer = SCIMPathLexer("123.45")
288        token = lexer.get_next_token()
289        self.assertEqual(token.type, TokenType.NUMBER)
290        self.assertEqual(token.value, "123.45")
291
292    def test_get_next_token_boolean_true(self):
293        """Test tokenizing boolean true"""
294        lexer = SCIMPathLexer("true")
295        token = lexer.get_next_token()
296        self.assertEqual(token.type, TokenType.BOOLEAN)
297        self.assertTrue(token.value)
298
299    def test_get_next_token_boolean_false(self):
300        """Test tokenizing boolean false"""
301        lexer = SCIMPathLexer("false")
302        token = lexer.get_next_token()
303        self.assertEqual(token.type, TokenType.BOOLEAN)
304        self.assertFalse(token.value)
305
306    def test_get_next_token_boolean_case_insensitive(self):
307        """Test tokenizing boolean with different cases"""
308        for value in ["TRUE", "True", "FALSE", "False"]:
309            with self.subTest(value=value):
310                lexer = SCIMPathLexer(value)
311                token = lexer.get_next_token()
312                self.assertEqual(token.type, TokenType.BOOLEAN)
313
314    def test_get_next_token_null(self):
315        """Test tokenizing null"""
316        lexer = SCIMPathLexer("null")
317        token = lexer.get_next_token()
318        self.assertEqual(token.type, TokenType.NULL)
319        self.assertIsNone(token.value)
320
321    def test_get_next_token_null_case_insensitive(self):
322        """Test tokenizing null with different cases"""
323        for value in ["NULL", "Null"]:
324            with self.subTest(value=value):
325                lexer = SCIMPathLexer(value)
326                token = lexer.get_next_token()
327                self.assertEqual(token.type, TokenType.NULL)
328
329    def test_get_next_token_and(self):
330        """Test tokenizing AND operator"""
331        lexer = SCIMPathLexer("and")
332        token = lexer.get_next_token()
333        self.assertEqual(token.type, TokenType.AND)
334        self.assertEqual(token.value, "and")
335
336    def test_get_next_token_or(self):
337        """Test tokenizing OR operator"""
338        lexer = SCIMPathLexer("or")
339        token = lexer.get_next_token()
340        self.assertEqual(token.type, TokenType.OR)
341        self.assertEqual(token.value, "or")
342
343    def test_get_next_token_not(self):
344        """Test tokenizing NOT operator"""
345        lexer = SCIMPathLexer("not")
346        token = lexer.get_next_token()
347        self.assertEqual(token.type, TokenType.NOT)
348        self.assertEqual(token.value, "not")
349
350    def test_get_next_token_operators(self):
351        """Test tokenizing all comparison operators"""
352        operators = ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"]
353        for op in operators:
354            with self.subTest(operator=op):
355                lexer = SCIMPathLexer(op)
356                token = lexer.get_next_token()
357                self.assertEqual(token.type, TokenType.OPERATOR)
358                self.assertEqual(token.value, op)
359
360    def test_get_next_token_operators_case_insensitive(self):
361        """Test tokenizing operators with different cases"""
362        for op in ["EQ", "Eq", "NE", "Ne"]:
363            with self.subTest(operator=op):
364                lexer = SCIMPathLexer(op)
365                token = lexer.get_next_token()
366                self.assertEqual(token.type, TokenType.OPERATOR)
367                self.assertEqual(token.value, op.lower())
368
369    def test_get_next_token_attribute(self):
370        """Test tokenizing attribute name"""
371        lexer = SCIMPathLexer("userName")
372        token = lexer.get_next_token()
373        self.assertEqual(token.type, TokenType.ATTRIBUTE)
374        self.assertEqual(token.value, "userName")
375
376    def test_get_next_token_attribute_with_underscore(self):
377        """Test tokenizing attribute name with underscore"""
378        lexer = SCIMPathLexer("_userName")
379        token = lexer.get_next_token()
380        self.assertEqual(token.type, TokenType.ATTRIBUTE)
381        self.assertEqual(token.value, "_userName")
382
383    def test_get_next_token_eof(self):
384        """Test tokenizing end of file"""
385        lexer = SCIMPathLexer("")
386        token = lexer.get_next_token()
387        self.assertEqual(token.type, TokenType.EOF)
388        self.assertEqual(token.value, "")
389
390    def test_get_next_token_with_whitespace(self):
391        """Test tokenizing with leading whitespace"""
392        lexer = SCIMPathLexer("   userName")
393        token = lexer.get_next_token()
394        self.assertEqual(token.type, TokenType.ATTRIBUTE)
395        self.assertEqual(token.value, "userName")
396
397    def test_get_next_token_skip_unknown_characters(self):
398        """Test that unknown characters are skipped"""
399        lexer = SCIMPathLexer("@#$userName")
400        token = lexer.get_next_token()
401        self.assertEqual(token.type, TokenType.ATTRIBUTE)
402        self.assertEqual(token.value, "userName")
403
404    def test_get_next_token_multiple_tokens(self):
405        """Test tokenizing multiple tokens in sequence"""
406        lexer = SCIMPathLexer("userName.givenName")
407
408        token1 = lexer.get_next_token()
409        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
410        self.assertEqual(token1.value, "userName")
411
412        token2 = lexer.get_next_token()
413        self.assertEqual(token2.type, TokenType.DOT)
414        self.assertEqual(token2.value, ".")
415
416        token3 = lexer.get_next_token()
417        self.assertEqual(token3.type, TokenType.ATTRIBUTE)
418        self.assertEqual(token3.value, "givenName")
419
420        token4 = lexer.get_next_token()
421        self.assertEqual(token4.type, TokenType.EOF)
422
423    def test_get_next_token_complex_filter(self):
424        """Test tokenizing complex filter expression"""
425        lexer = SCIMPathLexer('emails[type eq "work" and primary eq true]')
426
427        tokens = []
428        while True:
429            token = lexer.get_next_token()
430            tokens.append(token)
431            if token.type == TokenType.EOF:
432                break
433
434        expected_types = [
435            TokenType.ATTRIBUTE,  # emails
436            TokenType.LBRACKET,  # [
437            TokenType.ATTRIBUTE,  # type
438            TokenType.OPERATOR,  # eq
439            TokenType.STRING,  # "work"
440            TokenType.AND,  # and
441            TokenType.ATTRIBUTE,  # primary
442            TokenType.OPERATOR,  # eq
443            TokenType.BOOLEAN,  # true
444            TokenType.RBRACKET,  # ]
445            TokenType.EOF,
446        ]
447
448        self.assertEqual(len(tokens), len(expected_types))
449        for token, expected_type in zip(tokens, expected_types, strict=False):
450            self.assertEqual(token.type, expected_type)
451
452    def test_get_next_token_urn_attribute(self):
453        """Test tokenizing URN-based attribute"""
454        lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName")
455
456        token1 = lexer.get_next_token()
457        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
458        self.assertEqual(token1.value, SCIM_URN_USER)
459
460        token2 = lexer.get_next_token()
461        self.assertEqual(token2.type, TokenType.DOT)
462
463        token3 = lexer.get_next_token()
464        self.assertEqual(token3.type, TokenType.ATTRIBUTE)
465        self.assertEqual(token3.value, "userName")
466
467    def test_get_next_token_enterprise_urn(self):
468        """Test tokenizing enterprise URN"""
469        lexer = SCIMPathLexer(f"{SCIM_URN_USER_ENTERPRISE}.manager")
470
471        token1 = lexer.get_next_token()
472        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
473        self.assertEqual(token1.value, SCIM_URN_USER_ENTERPRISE)
474
475        token2 = lexer.get_next_token()
476        self.assertEqual(token2.type, TokenType.DOT)
477
478    def test_lexer_state_after_eof(self):
479        """Test lexer state after reaching EOF"""
480        lexer = SCIMPathLexer("a")
481
482        # Get first token
483        token1 = lexer.get_next_token()
484        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
485
486        # Get EOF token
487        token2 = lexer.get_next_token()
488        self.assertEqual(token2.type, TokenType.EOF)
489
490        # Should continue returning EOF
491        token3 = lexer.get_next_token()
492        self.assertEqual(token3.type, TokenType.EOF)
493
494    def test_read_identifier_edge_cases(self):
495        """Test read_identifier with edge cases"""
496        # Test identifier ending with colon
497        lexer = SCIMPathLexer("test:")
498        result = lexer.read_identifier()
499        self.assertEqual(result, "test:")
500
501        # Test identifier with numbers
502        lexer = SCIMPathLexer("test123")
503        result = lexer.read_identifier()
504        self.assertEqual(result, "test123")
505
506    def test_complex_urn_parsing(self):
507        """Test parsing complex URN with version numbers"""
508        urn = "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
509        lexer = SCIMPathLexer(urn)
510        result = lexer.read_identifier()
511        self.assertEqual(result, urn)

Test SCIMPathLexer class

def setUp(self):
56    def setUp(self):
57        """Set up test fixtures"""
58        self.simple_lexer = SCIMPathLexer("userName")

Set up test fixtures

def test_init(self):
60    def test_init(self):
61        """Test lexer initialization"""
62        lexer = SCIMPathLexer("test")
63        self.assertEqual(lexer.text, "test")
64        self.assertEqual(lexer.pos, 0)
65        self.assertEqual(lexer.current_char, "t")
66        self.assertIn(SCIM_URN_SCHEMA, lexer.schema_urns)
67        self.assertIn(SCIM_URN_GROUP, lexer.schema_urns)
68        self.assertIn(SCIM_URN_USER, lexer.schema_urns)
69        self.assertIn(SCIM_URN_USER_ENTERPRISE, lexer.schema_urns)
70        self.assertEqual(
71            lexer.OPERATORS, ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"]
72        )

Test lexer initialization

def test_init_empty_string(self):
74    def test_init_empty_string(self):
75        """Test lexer initialization with empty string"""
76        lexer = SCIMPathLexer("")
77        self.assertEqual(lexer.text, "")
78        self.assertEqual(lexer.pos, 0)
79        self.assertIsNone(lexer.current_char)

Test lexer initialization with empty string

def test_advance(self):
81    def test_advance(self):
82        """Test advance method"""
83        lexer = SCIMPathLexer("abc")
84        self.assertEqual(lexer.current_char, "a")
85
86        lexer.advance()
87        self.assertEqual(lexer.pos, 1)
88        self.assertEqual(lexer.current_char, "b")
89
90        lexer.advance()
91        self.assertEqual(lexer.pos, 2)
92        self.assertEqual(lexer.current_char, "c")
93
94        lexer.advance()
95        self.assertEqual(lexer.pos, 3)
96        self.assertIsNone(lexer.current_char)

Test advance method

def test_skip_whitespace(self):
 98    def test_skip_whitespace(self):
 99        """Test skip_whitespace method"""
100        lexer = SCIMPathLexer("   \t\n  abc")
101        lexer.skip_whitespace()
102        self.assertEqual(lexer.current_char, "a")

Test skip_whitespace method

def test_skip_whitespace_only_whitespace(self):
104    def test_skip_whitespace_only_whitespace(self):
105        """Test skip_whitespace with only whitespace"""
106        lexer = SCIMPathLexer("   \t\n  ")
107        lexer.skip_whitespace()
108        self.assertIsNone(lexer.current_char)

Test skip_whitespace with only whitespace

def test_skip_whitespace_no_whitespace(self):
110    def test_skip_whitespace_no_whitespace(self):
111        """Test skip_whitespace with no leading whitespace"""
112        lexer = SCIMPathLexer("abc")
113        original_pos = lexer.pos
114        lexer.skip_whitespace()
115        self.assertEqual(lexer.pos, original_pos)
116        self.assertEqual(lexer.current_char, "a")

Test skip_whitespace with no leading whitespace

def test_read_string_double_quotes(self):
118    def test_read_string_double_quotes(self):
119        """Test reading double-quoted string"""
120        lexer = SCIMPathLexer('"hello world"')
121        result = lexer.read_string('"')
122        self.assertEqual(result, "hello world")
123        self.assertIsNone(lexer.current_char)  # Should be at end

Test reading double-quoted string

def test_read_string_single_quotes(self):
125    def test_read_string_single_quotes(self):
126        """Test reading single-quoted string"""
127        lexer = SCIMPathLexer("'hello world'")
128        result = lexer.read_string("'")
129        self.assertEqual(result, "hello world")
130        self.assertIsNone(lexer.current_char)

Test reading single-quoted string

def test_read_string_with_escapes(self):
132    def test_read_string_with_escapes(self):
133        """Test reading string with escape characters"""
134        lexer = SCIMPathLexer('"hello \\"world\\""')
135        result = lexer.read_string('"')
136        self.assertEqual(result, 'hello "world"')

Test reading string with escape characters

def test_read_string_with_backslash_at_end(self):
138    def test_read_string_with_backslash_at_end(self):
139        """Test reading string with backslash at end"""
140        lexer = SCIMPathLexer('"hello\\"')
141        result = lexer.read_string('"')
142        self.assertEqual(result, 'hello"')

Test reading string with backslash at end

def test_read_string_unclosed(self):
144    def test_read_string_unclosed(self):
145        """Test reading unclosed string"""
146        lexer = SCIMPathLexer('"hello world')
147        result = lexer.read_string('"')
148        self.assertEqual(result, "hello world")
149        self.assertIsNone(lexer.current_char)

Test reading unclosed string

def test_read_string_empty(self):
151    def test_read_string_empty(self):
152        """Test reading empty string"""
153        lexer = SCIMPathLexer('""')
154        result = lexer.read_string('"')
155        self.assertEqual(result, "")

Test reading empty string

def test_read_number_integer(self):
157    def test_read_number_integer(self):
158        """Test reading integer number"""
159        lexer = SCIMPathLexer("123")
160        result = lexer.read_number()
161        self.assertEqual(result, "123")
162        self.assertIsNone(lexer.current_char)

Test reading integer number

def test_read_number_float(self):
164    def test_read_number_float(self):
165        """Test reading float number"""
166        lexer = SCIMPathLexer("123.456")
167        result = lexer.read_number()
168        self.assertEqual(result, "123.456")
169        self.assertIsNone(lexer.current_char)

Test reading float number

def test_read_number_with_multiple_dots(self):
171    def test_read_number_with_multiple_dots(self):
172        """Test reading number with multiple dots (invalid but handled)"""
173        lexer = SCIMPathLexer("123.456.789")
174        result = lexer.read_number()
175        self.assertEqual(result, "123.456.789")
176        self.assertIsNone(lexer.current_char)

Test reading number with multiple dots (invalid but handled)

def test_read_number_starting_with_dot(self):
178    def test_read_number_starting_with_dot(self):
179        """Test reading number starting with dot"""
180        lexer = SCIMPathLexer(".123")
181        result = lexer.read_number()
182        self.assertEqual(result, ".123")

Test reading number starting with dot

def test_read_identifier_simple(self):
184    def test_read_identifier_simple(self):
185        """Test reading simple identifier"""
186        lexer = SCIMPathLexer("userName")
187        result = lexer.read_identifier()
188        self.assertEqual(result, "userName")
189        self.assertIsNone(lexer.current_char)

Test reading simple identifier

def test_read_identifier_with_underscore(self):
191    def test_read_identifier_with_underscore(self):
192        """Test reading identifier with underscore"""
193        lexer = SCIMPathLexer("user_name")
194        result = lexer.read_identifier()
195        self.assertEqual(result, "user_name")

Test reading identifier with underscore

def test_read_identifier_with_hyphen(self):
197    def test_read_identifier_with_hyphen(self):
198        """Test reading identifier with hyphen"""
199        lexer = SCIMPathLexer("user-name")
200        result = lexer.read_identifier()
201        self.assertEqual(result, "user-name")

Test reading identifier with hyphen

def test_read_identifier_with_colon(self):
203    def test_read_identifier_with_colon(self):
204        """Test reading identifier with colon (URN format)"""
205        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User")
206        result = lexer.read_identifier()
207        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User")

Test reading identifier with colon (URN format)

def test_read_identifier_schema_urn(self):
209    def test_read_identifier_schema_urn(self):
210        """Test reading schema URN identifier"""
211        lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName")
212        result = lexer.read_identifier()
213        self.assertEqual(result, SCIM_URN_USER)
214        self.assertEqual(lexer.current_char, ".")  # Should stop at dot and set current_char to dot

Test reading schema URN identifier

def test_read_identifier_with_version_number(self):
216    def test_read_identifier_with_version_number(self):
217        """Test reading identifier with version number (dots followed by digits)"""
218        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:User")
219        result = lexer.read_identifier()
220        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:User")

Test reading identifier with version number (dots followed by digits)

def test_read_identifier_partial_urn_match(self):
222    def test_read_identifier_partial_urn_match(self):
223        """Test reading identifier that partially matches URN"""
224        lexer = SCIMPathLexer("urn:ietf:params:scim:schemas:core:2.0:CustomUser")
225        result = lexer.read_identifier()
226        self.assertEqual(result, "urn:ietf:params:scim:schemas:core:2.0:CustomUser")

Test reading identifier that partially matches URN

def test_get_next_token_dot(self):
229    def test_get_next_token_dot(self):
230        """Test tokenizing dot"""
231        lexer = SCIMPathLexer(".")
232        token = lexer.get_next_token()
233        self.assertEqual(token.type, TokenType.DOT)
234        self.assertEqual(token.value, ".")

Test tokenizing dot

def test_get_next_token_lbracket(self):
236    def test_get_next_token_lbracket(self):
237        """Test tokenizing left bracket"""
238        lexer = SCIMPathLexer("[")
239        token = lexer.get_next_token()
240        self.assertEqual(token.type, TokenType.LBRACKET)
241        self.assertEqual(token.value, "[")

Test tokenizing left bracket

def test_get_next_token_rbracket(self):
243    def test_get_next_token_rbracket(self):
244        """Test tokenizing right bracket"""
245        lexer = SCIMPathLexer("]")
246        token = lexer.get_next_token()
247        self.assertEqual(token.type, TokenType.RBRACKET)
248        self.assertEqual(token.value, "]")

Test tokenizing right bracket

def test_get_next_token_lparen(self):
250    def test_get_next_token_lparen(self):
251        """Test tokenizing left parenthesis"""
252        lexer = SCIMPathLexer("(")
253        token = lexer.get_next_token()
254        self.assertEqual(token.type, TokenType.LPAREN)
255        self.assertEqual(token.value, "(")

Test tokenizing left parenthesis

def test_get_next_token_rparen(self):
257    def test_get_next_token_rparen(self):
258        """Test tokenizing right parenthesis"""
259        lexer = SCIMPathLexer(")")
260        token = lexer.get_next_token()
261        self.assertEqual(token.type, TokenType.RPAREN)
262        self.assertEqual(token.value, ")")

Test tokenizing right parenthesis

def test_get_next_token_string_double_quotes(self):
264    def test_get_next_token_string_double_quotes(self):
265        """Test tokenizing double-quoted string"""
266        lexer = SCIMPathLexer('"test string"')
267        token = lexer.get_next_token()
268        self.assertEqual(token.type, TokenType.STRING)
269        self.assertEqual(token.value, "test string")

Test tokenizing double-quoted string

def test_get_next_token_string_single_quotes(self):
271    def test_get_next_token_string_single_quotes(self):
272        """Test tokenizing single-quoted string"""
273        lexer = SCIMPathLexer("'test string'")
274        token = lexer.get_next_token()
275        self.assertEqual(token.type, TokenType.STRING)
276        self.assertEqual(token.value, "test string")

Test tokenizing single-quoted string

def test_get_next_token_number_integer(self):
278    def test_get_next_token_number_integer(self):
279        """Test tokenizing integer"""
280        lexer = SCIMPathLexer("123")
281        token = lexer.get_next_token()
282        self.assertEqual(token.type, TokenType.NUMBER)
283        self.assertEqual(token.value, "123")

Test tokenizing integer

def test_get_next_token_number_float(self):
285    def test_get_next_token_number_float(self):
286        """Test tokenizing float"""
287        lexer = SCIMPathLexer("123.45")
288        token = lexer.get_next_token()
289        self.assertEqual(token.type, TokenType.NUMBER)
290        self.assertEqual(token.value, "123.45")

Test tokenizing float

def test_get_next_token_boolean_true(self):
292    def test_get_next_token_boolean_true(self):
293        """Test tokenizing boolean true"""
294        lexer = SCIMPathLexer("true")
295        token = lexer.get_next_token()
296        self.assertEqual(token.type, TokenType.BOOLEAN)
297        self.assertTrue(token.value)

Test tokenizing boolean true

def test_get_next_token_boolean_false(self):
299    def test_get_next_token_boolean_false(self):
300        """Test tokenizing boolean false"""
301        lexer = SCIMPathLexer("false")
302        token = lexer.get_next_token()
303        self.assertEqual(token.type, TokenType.BOOLEAN)
304        self.assertFalse(token.value)

Test tokenizing boolean false

def test_get_next_token_boolean_case_insensitive(self):
306    def test_get_next_token_boolean_case_insensitive(self):
307        """Test tokenizing boolean with different cases"""
308        for value in ["TRUE", "True", "FALSE", "False"]:
309            with self.subTest(value=value):
310                lexer = SCIMPathLexer(value)
311                token = lexer.get_next_token()
312                self.assertEqual(token.type, TokenType.BOOLEAN)

Test tokenizing boolean with different cases

def test_get_next_token_null(self):
314    def test_get_next_token_null(self):
315        """Test tokenizing null"""
316        lexer = SCIMPathLexer("null")
317        token = lexer.get_next_token()
318        self.assertEqual(token.type, TokenType.NULL)
319        self.assertIsNone(token.value)

Test tokenizing null

def test_get_next_token_null_case_insensitive(self):
321    def test_get_next_token_null_case_insensitive(self):
322        """Test tokenizing null with different cases"""
323        for value in ["NULL", "Null"]:
324            with self.subTest(value=value):
325                lexer = SCIMPathLexer(value)
326                token = lexer.get_next_token()
327                self.assertEqual(token.type, TokenType.NULL)

Test tokenizing null with different cases

def test_get_next_token_and(self):
329    def test_get_next_token_and(self):
330        """Test tokenizing AND operator"""
331        lexer = SCIMPathLexer("and")
332        token = lexer.get_next_token()
333        self.assertEqual(token.type, TokenType.AND)
334        self.assertEqual(token.value, "and")

Test tokenizing AND operator

def test_get_next_token_or(self):
336    def test_get_next_token_or(self):
337        """Test tokenizing OR operator"""
338        lexer = SCIMPathLexer("or")
339        token = lexer.get_next_token()
340        self.assertEqual(token.type, TokenType.OR)
341        self.assertEqual(token.value, "or")

Test tokenizing OR operator

def test_get_next_token_not(self):
343    def test_get_next_token_not(self):
344        """Test tokenizing NOT operator"""
345        lexer = SCIMPathLexer("not")
346        token = lexer.get_next_token()
347        self.assertEqual(token.type, TokenType.NOT)
348        self.assertEqual(token.value, "not")

Test tokenizing NOT operator

def test_get_next_token_operators(self):
350    def test_get_next_token_operators(self):
351        """Test tokenizing all comparison operators"""
352        operators = ["eq", "ne", "co", "sw", "ew", "gt", "lt", "ge", "le", "pr"]
353        for op in operators:
354            with self.subTest(operator=op):
355                lexer = SCIMPathLexer(op)
356                token = lexer.get_next_token()
357                self.assertEqual(token.type, TokenType.OPERATOR)
358                self.assertEqual(token.value, op)

Test tokenizing all comparison operators

def test_get_next_token_operators_case_insensitive(self):
360    def test_get_next_token_operators_case_insensitive(self):
361        """Test tokenizing operators with different cases"""
362        for op in ["EQ", "Eq", "NE", "Ne"]:
363            with self.subTest(operator=op):
364                lexer = SCIMPathLexer(op)
365                token = lexer.get_next_token()
366                self.assertEqual(token.type, TokenType.OPERATOR)
367                self.assertEqual(token.value, op.lower())

Test tokenizing operators with different cases

def test_get_next_token_attribute(self):
369    def test_get_next_token_attribute(self):
370        """Test tokenizing attribute name"""
371        lexer = SCIMPathLexer("userName")
372        token = lexer.get_next_token()
373        self.assertEqual(token.type, TokenType.ATTRIBUTE)
374        self.assertEqual(token.value, "userName")

Test tokenizing attribute name

def test_get_next_token_attribute_with_underscore(self):
376    def test_get_next_token_attribute_with_underscore(self):
377        """Test tokenizing attribute name with underscore"""
378        lexer = SCIMPathLexer("_userName")
379        token = lexer.get_next_token()
380        self.assertEqual(token.type, TokenType.ATTRIBUTE)
381        self.assertEqual(token.value, "_userName")

Test tokenizing attribute name with underscore

def test_get_next_token_eof(self):
383    def test_get_next_token_eof(self):
384        """Test tokenizing end of file"""
385        lexer = SCIMPathLexer("")
386        token = lexer.get_next_token()
387        self.assertEqual(token.type, TokenType.EOF)
388        self.assertEqual(token.value, "")

Test tokenizing end of file

def test_get_next_token_with_whitespace(self):
390    def test_get_next_token_with_whitespace(self):
391        """Test tokenizing with leading whitespace"""
392        lexer = SCIMPathLexer("   userName")
393        token = lexer.get_next_token()
394        self.assertEqual(token.type, TokenType.ATTRIBUTE)
395        self.assertEqual(token.value, "userName")

Test tokenizing with leading whitespace

def test_get_next_token_skip_unknown_characters(self):
397    def test_get_next_token_skip_unknown_characters(self):
398        """Test that unknown characters are skipped"""
399        lexer = SCIMPathLexer("@#$userName")
400        token = lexer.get_next_token()
401        self.assertEqual(token.type, TokenType.ATTRIBUTE)
402        self.assertEqual(token.value, "userName")

Test that unknown characters are skipped

def test_get_next_token_multiple_tokens(self):
404    def test_get_next_token_multiple_tokens(self):
405        """Test tokenizing multiple tokens in sequence"""
406        lexer = SCIMPathLexer("userName.givenName")
407
408        token1 = lexer.get_next_token()
409        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
410        self.assertEqual(token1.value, "userName")
411
412        token2 = lexer.get_next_token()
413        self.assertEqual(token2.type, TokenType.DOT)
414        self.assertEqual(token2.value, ".")
415
416        token3 = lexer.get_next_token()
417        self.assertEqual(token3.type, TokenType.ATTRIBUTE)
418        self.assertEqual(token3.value, "givenName")
419
420        token4 = lexer.get_next_token()
421        self.assertEqual(token4.type, TokenType.EOF)

Test tokenizing multiple tokens in sequence

def test_get_next_token_complex_filter(self):
423    def test_get_next_token_complex_filter(self):
424        """Test tokenizing complex filter expression"""
425        lexer = SCIMPathLexer('emails[type eq "work" and primary eq true]')
426
427        tokens = []
428        while True:
429            token = lexer.get_next_token()
430            tokens.append(token)
431            if token.type == TokenType.EOF:
432                break
433
434        expected_types = [
435            TokenType.ATTRIBUTE,  # emails
436            TokenType.LBRACKET,  # [
437            TokenType.ATTRIBUTE,  # type
438            TokenType.OPERATOR,  # eq
439            TokenType.STRING,  # "work"
440            TokenType.AND,  # and
441            TokenType.ATTRIBUTE,  # primary
442            TokenType.OPERATOR,  # eq
443            TokenType.BOOLEAN,  # true
444            TokenType.RBRACKET,  # ]
445            TokenType.EOF,
446        ]
447
448        self.assertEqual(len(tokens), len(expected_types))
449        for token, expected_type in zip(tokens, expected_types, strict=False):
450            self.assertEqual(token.type, expected_type)

Test tokenizing complex filter expression

def test_get_next_token_urn_attribute(self):
452    def test_get_next_token_urn_attribute(self):
453        """Test tokenizing URN-based attribute"""
454        lexer = SCIMPathLexer(f"{SCIM_URN_USER}.userName")
455
456        token1 = lexer.get_next_token()
457        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
458        self.assertEqual(token1.value, SCIM_URN_USER)
459
460        token2 = lexer.get_next_token()
461        self.assertEqual(token2.type, TokenType.DOT)
462
463        token3 = lexer.get_next_token()
464        self.assertEqual(token3.type, TokenType.ATTRIBUTE)
465        self.assertEqual(token3.value, "userName")

Test tokenizing URN-based attribute

def test_get_next_token_enterprise_urn(self):
467    def test_get_next_token_enterprise_urn(self):
468        """Test tokenizing enterprise URN"""
469        lexer = SCIMPathLexer(f"{SCIM_URN_USER_ENTERPRISE}.manager")
470
471        token1 = lexer.get_next_token()
472        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
473        self.assertEqual(token1.value, SCIM_URN_USER_ENTERPRISE)
474
475        token2 = lexer.get_next_token()
476        self.assertEqual(token2.type, TokenType.DOT)

Test tokenizing enterprise URN

def test_lexer_state_after_eof(self):
478    def test_lexer_state_after_eof(self):
479        """Test lexer state after reaching EOF"""
480        lexer = SCIMPathLexer("a")
481
482        # Get first token
483        token1 = lexer.get_next_token()
484        self.assertEqual(token1.type, TokenType.ATTRIBUTE)
485
486        # Get EOF token
487        token2 = lexer.get_next_token()
488        self.assertEqual(token2.type, TokenType.EOF)
489
490        # Should continue returning EOF
491        token3 = lexer.get_next_token()
492        self.assertEqual(token3.type, TokenType.EOF)

Test lexer state after reaching EOF

def test_read_identifier_edge_cases(self):
494    def test_read_identifier_edge_cases(self):
495        """Test read_identifier with edge cases"""
496        # Test identifier ending with colon
497        lexer = SCIMPathLexer("test:")
498        result = lexer.read_identifier()
499        self.assertEqual(result, "test:")
500
501        # Test identifier with numbers
502        lexer = SCIMPathLexer("test123")
503        result = lexer.read_identifier()
504        self.assertEqual(result, "test123")

Test read_identifier with edge cases

def test_complex_urn_parsing(self):
506    def test_complex_urn_parsing(self):
507        """Test parsing complex URN with version numbers"""
508        urn = "urn:ietf:params:scim:schemas:extension:enterprise:2.0:User"
509        lexer = SCIMPathLexer(urn)
510        result = lexer.read_identifier()
511        self.assertEqual(result, urn)

Test parsing complex URN with version numbers