regex
1from typing_extensions import Self 2from typing import Tuple 3import re 4 5""" 6Docs for RegexGenerator 7""" 8 9class RegexGen: 10 """ 11 Start of the class 12 """ 13 # ranges 14 lowercaserange: str = "[a-z]" 15 uppercaserange: str = "[A-Z]" 16 digitsrange: str = "[0-9]" 17 symbolsrange: str = "\W" 18 alphanumeric: str = "\w" 19 # ecape sequences 20 block_word: str = "\b" 21 nonblock_word: str = "\B" 22 new_line: str = "\n" 23 tab_space: str = "\t" 24 carriage_return: str = "\r" 25 whitespace = "\s" 26 27 def __init__(self): 28 self.__regex_data: str = str() 29 30 31 32 def linestartwith(self): 33 ''' 34 <code>linestartwith</code> adds a expression to indicate beginning of the string and is always added on a new line.<br></p> 35 The function definition is: 36 `linestartwith(self)` 37 <p>When this function is called, the function adds the expression '^' if no regex data exists already.</p> 38 <p>If the regex data already exists then the function adds the expression '\\n^' indicating line is changed before signifying beginning of the string. </p> 39 <p><code>Linestartwith</code> function can be used in combination with other function to create a regex syntax to check if the expression starts with certain pattern.<br> 40 </p> 41 ``` 42 regex = RegexGen().linestartwith().text('foo').any(1,5).text('bar').endofline() 43 ``` 44 <p>This code checks for text starting with 'foo' and starting with 'bar' after any characters of length of min 1 and max 5 digits.</p> 45 <p>The regex is displayed as:</p> 46 <samp> regex = "^foo.{1,5}bar"</samp></p> 47 ^ character symbolizes the start of the string or line. 48 ''' 49 if not len(self.__regex_data): 50 self.__regex_data += '^' 51 else: 52 self.__regex_data += '\n^' 53 return self 54 55 56 57 def endofline(self): 58 ''' 59 <p><code>endofline</code> adds a expression to indicates end of the string and end of the line.<br> 60 The function definition is: 61 <code> 62 endofline(self) 63 </code> 64 <p>When this function is called, the function adds the expression '$' to the regex data. 65 <p>If the regex data already exists then the function adds the expression '\n^' indicating line is changed before signifying beginning of the string. 66 <p><code>endofline</code> function can be used in combination with other function to create a regex syntax to check if the expression ends with certain pattern.<br> 67 <pre><code><p>regex = RegexGen().regex.text('abc', 1, 1).endofline()</p></code></pre> 68 <p>This code checks for text ending with 'abc' after any characters of any length 69 <p>The regex is displayed as:<br> 70 <samp> regex = "abc$"</samp><br> 71 $ character symbolizes as end of a line 72 </p></p></p></p></p> 73 ''' 74 self.__regex_data += '$' 75 return self 76 77 @staticmethod 78 def range(start: str, end: str) -> str: 79 ''' 80 <code>Range</code> function provides syntax for defining range.<br><br> 81 In the function definition, 82 <code>range(start: str, end: str)</code> <br> 83 1. start accepts string for starting range whose length must be 1. 84 2. end accepts string for ending range whose length must be 1. 85 3. return returns the range in format <start>-<end> 86 87 <pre><code><p>regex = RegexGen().regex.range('a', 'z')</p></code></pre> 88 <p>The regex is displayed as:</p> 89 <samp> regex = "[a-z]"</samp><br> 90 Range for symbols will throw an error 91 start : str // length must be 1 92 end : str //length must be 1 93 return : str //returns the range in format <start>-<end> 94 ''' 95 if (not start and not end) and (len(start) > 1 and len(end) > 1): 96 raise Exception("In function {}, range_start : {}, range_end:{} => Characters cannot be None".format( 97 RegexGen.range.__name__, start, end)) 98 99 # check if range is valid 100 character_range = f"({start}-{end})" 101 if (valid_ranges(character_range, is_lower_case, is_upper_case, is_number)): 102 return character_range 103 raise Exception("In function {}, range_start : {}, range_end:{} => This is not a valid range. Valid ranges are 0-9,A-Z or a-z or \W".format( 104 RegexGen.range.__name__, start, end)) 105 106 107 @staticmethod 108 def exclude(characters: str, pattern_prevent: bool = False) -> Tuple[str, bool]: 109 ''' 110 <code>Exclude</code> function is a static function. It excludes certain pattern based upon the input of the user.<br> 111 In the function definition, <code>exclude(characters: str, pattern_prevent: bool = False) -> tuple</code>, 112 <ol><li> the characters : str signifies characters to be matched,</li> 113 <li>pattern_prevent : str (default = False) Here, on True, prevents the characters sequence to match(The sequence must not contain a range) 114 and on false prevent piecewise occuring of characters. 115 and returns a tuple </li> 116 117 118 <pre><code><p>RegexGen().digits(1, 10, RegexGen.exclude("23", True), capture=True) </p></code></pre> 119 120 <p>The regex is displayed as:<br> 121 <samp> regex = "\b(?:(?![23])\d)+\b"</samp><br> 122 characters : str //characters to be matched 123 pattern_prevent : bool => default = False //On True, prevents the characters sequence to match(The sequence must not contain a range) 124 //and on false prevent piecewise occuring of characters. 125 return : tuple 126 ''' 127 if not characters: 128 raise Exception("In function {}, Character : {} => Characters cannot be None".format( 129 RegexGen.exclude.__name__, characters)) 130 # check if charaters is a range 131 try: 132 if valid_ranges(characters, is_lower_case, is_upper_case, is_number) or characters.find(RegexGen.symbolsrange) != -1: 133 pattern_prevent = False 134 except (...): 135 raise 136 137 return characters, pattern_prevent 138 139 @staticmethod 140 def boundary_character(character: str, start: bool = True) -> str: 141 ''' 142 <code>Boundary character</code> gives left or right boundary of the word as required.<br> 143 In the function definition, <code> boundary_character(character: str, start: bool = True) -> str: </code><br> 144 <ol> 145 <li> character: str signifies characters to be matched</li> 146 <li>start : bool (default = True) On true, the letter is the left boundary of the word<br> 147 and on false the letter is the right boundary of the word. 148 ''' 149 if len(character) > 2: 150 raise Exception("In function {}, start : {} => Character cannot be length greater than two", 151 RegexGen.boundary_character.__name__, start) 152 elif len(character) == 2 and character not in {"\w", "\W", "\d", "\."}: 153 raise Exception("In function {}, start : {} => Character is not a \w or \W or \d or \.", 154 RegexGen.boundary_character.__name__, start) 155 156 character_str = "\b" + character if start else character + "\b" 157 158 return character_str 159 160 161 def add_quantifier(self, min: int, max: int, **kwargs) -> str: 162 """ 163 <code>__add_quantifier</code>adds quantifiers like ? + * x(n,m).<br> 164 The function definition is: <code>__add_quantifier(self, min: int, max: int, **kwargs)</code>.<br> 165 The regex generated depends on the value of min and max. 166 <ol> 167 <li>min == max and max == 0:<br> 168 If no characters exist then the exception is raised stating min and max can't be zero. 169 <pre> regex = " "</pre> </li> 170 <li>max == min and min == 1: 171 <pre> regex = "^foo.{1,5}bar"</pre></li> 172 <li>max == min: 173 <pre> regex = "^foo.{1,5}bar"</pre></li> 174 <li>min == 0 and max == 1: 175 <pre> regex = "^foo.{1,5}bar"</pre></li> 176 <>max == 0 and min > 0: 177 <pre> regex = "^foo.{1,5}bar"</pre></li> 178 <li>max > min and min > 0: 179 <pre> regex = "^foo.{1,5}bar"</pre></li> 180 <li>Else: 181 If no characters exist then the exception is raised stating min and max can't be zero.</li> 182 </ol> 183 <pre> regex = "^foo.{1,5}bar"</pre> 184 Add Quantifiers like {0},{0,1},?,*,+,{0,1} 185 """ 186 regexchar: str = str() 187 188 if min == max and max == 0: 189 zeroormore = kwargs.get("zeroormore", False) 190 oneormore = kwargs.get("oneormore", False) 191 if zeroormore: 192 regexchar += '*' 193 elif oneormore: 194 regexchar += '+' 195 else: 196 raise Exception("In function {} => Min And Max Cannot be Zero" 197 .format(self.__add_quantifier.__name__)) 198 elif max == min and min == 1: 199 regexchar = "" 200 elif max == min: 201 regexchar = f"{{{min}}}" 202 elif min == 0 and max == 1: 203 regexchar = "?" 204 elif max == 0 and min > 0: 205 regexchar = f"{{{min},}}" 206 elif max > min and min > 0: 207 regexchar = f"{{{min},{max}}}" 208 else: 209 regexchar = f"{{,{max}}}" 210 211 return regexchar 212 213 def text(self, character: str, min: int = 0, max: int = 0, **kwargs) -> Self: 214 ''' 215 <p><code>Text</code> function simply adds the input to regex syntax.</p> 216 <pre><code><p>RegexGen.text("This is a text.") </p></code></pre> 217 218 <p>The regex is displayed as:</p> 219 <pre> regex = "This is a text."</pre> 220 Text is generated using Characters function. 221 character : str // A character can be a word, alphabet, a digit or number and symbols or a range 222 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 223 max : int => default = 0 224 capture : bool => default = False //On True enclose the character in parenthesis so that regex engine capture data 225 kwargs : dict => { 226 zeroormore : bool => default=False, 227 oneormore : bool => default=False 228 } 229 return : RegexGen 230 ''' 231 letterstr: str = str() 232 temp: str = str() 233 234 if not character: 235 raise Exception("In function {}, Character : {} => Character cannot be None".format( 236 self.text.__name__, character)) 237 238 letterstr = character 239 240 try: 241 temp = self.__add_quantifier(min, max, **kwargs) 242 except Exception as e: 243 raise 244 245 self.__regex_data += letterstr 246 self.__regex_data += temp 247 248 return self 249 250 def any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self: 251 ''' 252 <p><code>Any</code> function generates a regex which can be utilized to check if a certain character exists in the expression<br> 253 In the function definition, 254 <code>any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:</code> 255 , 256 <ol class="list-group list-group-numbered"> 257 <li class="list-group-item d-flex justify-content-between align-items-start"> 258 <div class="ms-2 me-auto"> 259 the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br> 260 </div></li> 261 <li class="list-group-item d-flex justify-content-between align-items-start"> 262 <div class="ms-2 me-auto"> 263 If on capture : bool (default=False) True is passed, it enclose . in parenthesis so that regex engine capture data. <br> 264 </div></li> 265 <li class="list-group-item d-flex justify-content-between align-items-start"> 266 <div class="ms-2 me-auto"> 267 The kwargs : dict accepts {<br> 268 zeroormore : bool (default=False), <br> 269 oneormore : bool (default=False) 270 </div></li> 271 <li class="list-group-item d-flex justify-content-between align-items-start"> 272 <div class="ms-2 me-auto"> 273 The return returns RegexGen 274 </div></li></ol><br></p> 275 <pre><code>regex = RegexGen() 276 regex = regex.any(min=0, max=12)</code></pre> 277 278 <p>The regex is displayed as:</p> 279 <pre> regex = ".{0,12}"</pre> 280 . character symbolizes any character 281 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 282 max : int => default = 0 283 capture : bool => default=False //On True enclose . in parenthesis so that regex engine capture data 284 kwargs : dict => { 285 zeroormore : bool => default=False, 286 oneormore : bool => default=False 287 } 288 return : RegexGen 289 ''' 290 anystr: str = str() 291 temp: str = str() 292 293 try: 294 temp = self.__add_quantifier(min, max, **kwargs) 295 except (...): 296 raise 297 298 anystr = f"(.{temp})" if capture else f".{temp}" 299 self.__regex_data += anystr 300 301 return self 302 303 304 def digits(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self: 305 ''' 306 This function is used to match only numbers that may not contain a sequence of number or the each numbers existing independently. 307 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 308 max : int => default = 0 309 pattern : a tuple[str, bool] expected a return type from exclude static function 310 capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data 311 kwargs : dict => { 312 zeroormore : bool => default=False, 313 oneormore : bool => default=False 314 } 315 return : RegexGen 316 ''' 317 digitstr: str = str() 318 temp: str = str() 319 320 try: 321 temp = self.__add_quantifier(min, max, **kwargs) 322 except (...): 323 raise 324 325 if pattern is None: 326 digitstr = f"(\d{temp})" if capture else f"\d{temp}" 327 elif pattern[1]: 328 digitstr = f"((?!{pattern[0]})\d){temp}" if capture else f"(?:(?!{pattern[0]})\d){temp}" 329 else: 330 digitstr = f"((?![{pattern[0]}])\d){temp}" if capture else f"(?:(?![{pattern[0]}])\d){temp}" 331 332 self.__regex_data += digitstr 333 334 return self 335 336 def alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self: 337 ''' 338 <p><code>Alphabets</code> function matches only words(not numbers) that may not contain a sequence of letters or each of the letters exist independently.<br> 339 </p> 340 In the function definition, 341 <code>alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs)</code> 342 , <br><br> 343 <ol class="list-group list-group-numbered"> 344 <li class="list-group-item d-flex justify-content-between align-items-start"> 345 <div class="ms-2 me-auto"> 346 the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br> 347 </div></li> 348 <li class="list-group-item d-flex justify-content-between align-items-start"> 349 <div class="ms-2 me-auto"> 350 In pattern, a tuple[str, bool] is expected as a return type from exclude static function. <br> 351 </div></li> 352 <li class="list-group-item d-flex justify-content-between align-items-start"> 353 <div class="ms-2 me-auto"> 354 If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that regex engine captures data.<br> 355 </div></li> 356 <li class="list-group-item d-flex justify-content-between align-items-start"> 357 <div class="ms-2 me-auto"> 358 The kwargs : dict accepts {<br> 359 zeroormore : bool (default=False), <br> 360 oneormore : bool (default=False) 361 </div></li> 362 <li class="list-group-item d-flex justify-content-between align-items-start"> 363 <div class="ms-2 me-auto"> 364 The return returns RegexGen 365 </div></li></ol><br> 366 <pre><code><p>regex = RegexGen().alphabets(1,5)</p></code></pre> 367 <p>The regex is displayed as:</p> 368 <pre> regex = "[a-zA-Z]{1,5}"</pre> 369 This function is used to match only words(not numbers) that may not contain a sequence of letters or the each letters existing independently. 370 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 371 max : int => default = 0 372 pattern : a tuple[str, bool] expected a return type from exclude static function 373 capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data 374 kwargs : dict => { 375 zeroormore : bool => default=False, 376 oneormore : bool => default=False 377 } 378 return : RegexGen 379 ''' 380 characterstr: str = str() 381 temp: str = str() 382 383 try: 384 temp = self.__add_quantifier(min, max, **kwargs) 385 except (...): 386 raise 387 388 if pattern is None: 389 characterstr = f"([a-zA-Z]{temp})" if capture else f"[a-zA-Z]{temp}" 390 elif pattern[1]: 391 characterstr = f"((?!{pattern[0]})a-zA-Z){temp}" if capture else f"(?:(?!{pattern[0]})a-zA-Z){temp}" 392 else: 393 characterstr = f"((?![{pattern[0]}])a-zA-Z){temp}\b" if capture else f"(?:(?![{pattern[0]}])a-zA-Z){temp}" 394 395 self.__regex_data += characterstr 396 397 return self 398 399 def get_non_capturing_regex(self) -> str: 400 ''' 401 If the program have capture parameters it will prevent regex engine from capturing index and patterns from the string 402 reducing capturing overhead and hence increase efficiency 403 return : str 404 ''' 405 return f"(?:{self.__regex_data})" 406 407 def get_regex_data(self) -> str: 408 ''' 409 Returns a regex syntax that may capture the text from the input string. 410 return : str 411 ''' 412 return self.__regex_data 413 414 def combine(self, regex: Self) -> Self: 415 ''' 416 <p><code>Combine</code> function creates a regex syntax to combine two regex expressions in one to create a pattern.<br> 417 </p> 418 In the function definition, 419 <code>combine(self, regex: Self)</code> 420 , the function accepts value of two different regex to perform the combination operation. <br> 421 <pre><code><p>regexa = RegexGen().digits(4,4).text(RegexGen.characters('-')) 422 regexb = RegexGen().digits(3,3) 423 regex = RegexGen.combine(regexa, regexb) </p></code></pre> 424 425 <p>The regex is displayed as:</p> 426 <pre> regex = "\d{4,4}-\d{3,3}"</pre> 427 regex : RegexGen //Object that has regex syntax which is addable 428 return : RegexGen 429 ''' 430 if len(regex.__regex_data) == 0: # and regex.__regex_data[0] == '^': 431 raise Exception("Invalid regex to combine") 432 433 self.__regex_data += regex.__regex_data 434 435 return self 436 437 @staticmethod 438 def any_of(characters: str, capture: bool = False, **kwargs) -> str: 439 ''' 440 <p><code>Any of</code> function is any_of_the_block with quantifiers or simply put, this function defines repetition of words in the list.</p> <br> 441 In the function definition, <code>any_of(characters: str, capture: bool = False, **kwargs) -> str:</code>, 442 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 443 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 444 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 445 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 446 <li>The return returns RegexGen.</li></ol> 447 448 ''' 449 if valid_ranges(characters, is_number, is_lower_case, is_upper_case) or characters.find(RegexGen.symbolsrange) != -1: 450 pass 451 452 for character in characters: 453 if not type("a").isascii(character): 454 raise Exception("In function {}, character : {} => Non ascii character is not acceptable".format( 455 RegexGen.any_of.__name__, character)) 456 return f"([{characters}])" if capture else f"(?:[{characters}])" 457 458 @staticmethod 459 def characters(char: str) -> str: 460 ''' 461 <p><code>Characters</code> function is a static function which is unable to create a regex syntax.<br> 462 Instead, a function like Text is used to submit to the regex syntax. <br> 463 Characters is used some characters predefined in the regex library are used and thus they need to be escaped. 464 <pre><code><p>RegexGen.text(RegexGen.characters("This+is{a$text.") </p></code></pre> 465 466 <p>The regex is displayed as:</p> 467 <samp> regex = "This\+is\{a\$text\."</samp><br> 468 some characters are predefined in the regex library thus they need to be escaped 469 return : str 470 </p> 471 ''' 472 letters: str = str() 473 if not char: 474 raise Exception("In function {}, character : {} => Input cannot be none ".format( 475 RegexGen.character.__name__, char)) 476 477 predefined_symbols: set = { 478 '\\', '.', '(', ')', '*', '{', '}', '^', '+', '?', '[', ']', '$', '|'} 479 480 for lettr in char: 481 if lettr in predefined_symbols: 482 letters += f"\\{lettr}" 483 else: 484 letters += lettr 485 return letters 486 487 def succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self: 488 ''' 489 This function is used to match the pattern succeeded by another pattern.<br> 490 In the function definition, <code>succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>, 491 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 492 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 493 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 494 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 495 <li>The return returns RegexGen.</li></ol> 496 ''' 497 if not preceeding or len(preceeding) != 2: 498 raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format( 499 RegexGen.succeeded_by.__name__)) 500 if not succeeding or len(succeeding) != 2: 501 raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format( 502 RegexGen.succeeded_by.__name__)) 503 504 characterstr: str = str() 505 temp: str = str() 506 507 try: 508 temp = self.__add_quantifier(min, max, **kwargs) 509 except (...): 510 raise 511 512 followblock: str = str() 513 if invert: 514 followblock = f"(?!{succeeding[0]})" if succeeding[1] else f"(?![{succeeding[0]}])" 515 else: 516 followblock = f"(?={succeeding[0]})" if succeeding[1] else f"(?=[{succeeding[0]}])" 517 518 precedingblock: str = f"{preceeding[0]}{temp}" if preceeding[1] else f"[{preceeding[0]}]{temp}" 519 520 if len(self.__regex_data) > len(precedingblock) and \ 521 self.__regex_data.rindex(precedingblock) == len(self.__regex_data)-len(precedingblock)-1: 522 characterstr += followblock 523 self.__regex_data = self.__regex_data[:-1] 524 characterstr += ')' 525 else: 526 characterstr = precedingblock + followblock 527 characterstr = f"({characterstr})" if capture else f"(?:{characterstr})" 528 self.__regex_data += characterstr 529 return self 530 531 def preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self: 532 ''' 533 This function is used to match pattern that is preceded by another pattern.<br> 534 If the pattern of the succeeded_by and preceeded_by matches the combination is union. <br> 535 In the function definition, <code>preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>, 536 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 537 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 538 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 539 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 540 <li>The return returns RegexGen.</li></ol> 541 ''' 542 if not preceding or len(preceding) != 2: 543 raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format( 544 RegexGen.preceded_by.__name__)) 545 if not succeeding or len(succeeding) != 2: 546 raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format( 547 RegexGen.preceded_by.__name__)) 548 549 characterstr: str = str() 550 temp: str = str() 551 552 try: 553 temp = self.__add_quantifier(min, max, **kwargs) 554 except (...): 555 raise 556 557 preceedingblock: str = str() 558 if invert: 559 preceedingblock = f"(?<!{preceding[0]})" if preceding[1] else f"(?<![{preceding[0]}])" 560 else: 561 preceedingblock = f"(?<={preceding[0]})" if preceding[1] else f"(?<=[{preceding[0]}])" 562 563 followblock: str = f"{succeeding[0]}{temp}" if succeeding[1] else f"[{succeeding[0]}]{temp}" 564 characterstr = preceedingblock + followblock 565 characterstr = f"({characterstr})" if capture else f"(?:{characterstr})" 566 self.__regex_data += characterstr 567 return self 568 569 # @staticmethod 570 # def any_of(characters: tuple[dict], capture: bool = False, **kwargs) -> str: 571 # ''' 572 # This function is any_of_the_block with quantifiers or this function defines repetition of words in the list. <br> 573 # In the function definition, <code>any_of(characters: tuple[dict], capture: bool = False, **kwargs) -> str:</code>, 574 # <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 575 # <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 576 # <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 577 # <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 578 # <li>The return returns RegexGen.</li></ol> 579 # ''' 580 # character_str = str() 581 # tempstr = str() 582 583 # if not len(characters): 584 # return "" 585 586 # character_pair = list() 587 # for index, listitem in enumerate(characters): 588 # character = listitem.pop("character", None) 589 # min = listitem.pop("min", 0) 590 # max = listitem.pop("max", 0) 591 # if character is None: 592 # raise Exception("In function {}, at index {} doesn't have character pair.".format( 593 # RegexGen.any_of.__name__, index)) 594 # if len(character) == 0: 595 # continue 596 # elif len(character) == 1 or (len(character) == 2 and character in {"\s", "\d", "\w", "\W"}): 597 # pass 598 # elif len(character) == 3 and valid_ranges(character, is_lower_case, is_number, is_upper_case): 599 # pass 600 # else: 601 # raise Exception("In function {}, at index {}, Unknown Character: {}.".format( 602 # RegexGen.any_of.__name__, index, character)) 603 # tempstr = RegexGen.__add_quantifier(min=min, max=max, **listitem) 604 # character_pair.append(character+tempstr) 605 606 # character_str = "|".join(character_pair) 607 # return f"({character_str})" if capture else f"(?:{character_str})"
10class RegexGen: 11 """ 12 Start of the class 13 """ 14 # ranges 15 lowercaserange: str = "[a-z]" 16 uppercaserange: str = "[A-Z]" 17 digitsrange: str = "[0-9]" 18 symbolsrange: str = "\W" 19 alphanumeric: str = "\w" 20 # ecape sequences 21 block_word: str = "\b" 22 nonblock_word: str = "\B" 23 new_line: str = "\n" 24 tab_space: str = "\t" 25 carriage_return: str = "\r" 26 whitespace = "\s" 27 28 def __init__(self): 29 self.__regex_data: str = str() 30 31 32 33 def linestartwith(self): 34 ''' 35 <code>linestartwith</code> adds a expression to indicate beginning of the string and is always added on a new line.<br></p> 36 The function definition is: 37 `linestartwith(self)` 38 <p>When this function is called, the function adds the expression '^' if no regex data exists already.</p> 39 <p>If the regex data already exists then the function adds the expression '\\n^' indicating line is changed before signifying beginning of the string. </p> 40 <p><code>Linestartwith</code> function can be used in combination with other function to create a regex syntax to check if the expression starts with certain pattern.<br> 41 </p> 42 ``` 43 regex = RegexGen().linestartwith().text('foo').any(1,5).text('bar').endofline() 44 ``` 45 <p>This code checks for text starting with 'foo' and starting with 'bar' after any characters of length of min 1 and max 5 digits.</p> 46 <p>The regex is displayed as:</p> 47 <samp> regex = "^foo.{1,5}bar"</samp></p> 48 ^ character symbolizes the start of the string or line. 49 ''' 50 if not len(self.__regex_data): 51 self.__regex_data += '^' 52 else: 53 self.__regex_data += '\n^' 54 return self 55 56 57 58 def endofline(self): 59 ''' 60 <p><code>endofline</code> adds a expression to indicates end of the string and end of the line.<br> 61 The function definition is: 62 <code> 63 endofline(self) 64 </code> 65 <p>When this function is called, the function adds the expression '$' to the regex data. 66 <p>If the regex data already exists then the function adds the expression '\n^' indicating line is changed before signifying beginning of the string. 67 <p><code>endofline</code> function can be used in combination with other function to create a regex syntax to check if the expression ends with certain pattern.<br> 68 <pre><code><p>regex = RegexGen().regex.text('abc', 1, 1).endofline()</p></code></pre> 69 <p>This code checks for text ending with 'abc' after any characters of any length 70 <p>The regex is displayed as:<br> 71 <samp> regex = "abc$"</samp><br> 72 $ character symbolizes as end of a line 73 </p></p></p></p></p> 74 ''' 75 self.__regex_data += '$' 76 return self 77 78 @staticmethod 79 def range(start: str, end: str) -> str: 80 ''' 81 <code>Range</code> function provides syntax for defining range.<br><br> 82 In the function definition, 83 <code>range(start: str, end: str)</code> <br> 84 1. start accepts string for starting range whose length must be 1. 85 2. end accepts string for ending range whose length must be 1. 86 3. return returns the range in format <start>-<end> 87 88 <pre><code><p>regex = RegexGen().regex.range('a', 'z')</p></code></pre> 89 <p>The regex is displayed as:</p> 90 <samp> regex = "[a-z]"</samp><br> 91 Range for symbols will throw an error 92 start : str // length must be 1 93 end : str //length must be 1 94 return : str //returns the range in format <start>-<end> 95 ''' 96 if (not start and not end) and (len(start) > 1 and len(end) > 1): 97 raise Exception("In function {}, range_start : {}, range_end:{} => Characters cannot be None".format( 98 RegexGen.range.__name__, start, end)) 99 100 # check if range is valid 101 character_range = f"({start}-{end})" 102 if (valid_ranges(character_range, is_lower_case, is_upper_case, is_number)): 103 return character_range 104 raise Exception("In function {}, range_start : {}, range_end:{} => This is not a valid range. Valid ranges are 0-9,A-Z or a-z or \W".format( 105 RegexGen.range.__name__, start, end)) 106 107 108 @staticmethod 109 def exclude(characters: str, pattern_prevent: bool = False) -> Tuple[str, bool]: 110 ''' 111 <code>Exclude</code> function is a static function. It excludes certain pattern based upon the input of the user.<br> 112 In the function definition, <code>exclude(characters: str, pattern_prevent: bool = False) -> tuple</code>, 113 <ol><li> the characters : str signifies characters to be matched,</li> 114 <li>pattern_prevent : str (default = False) Here, on True, prevents the characters sequence to match(The sequence must not contain a range) 115 and on false prevent piecewise occuring of characters. 116 and returns a tuple </li> 117 118 119 <pre><code><p>RegexGen().digits(1, 10, RegexGen.exclude("23", True), capture=True) </p></code></pre> 120 121 <p>The regex is displayed as:<br> 122 <samp> regex = "\b(?:(?![23])\d)+\b"</samp><br> 123 characters : str //characters to be matched 124 pattern_prevent : bool => default = False //On True, prevents the characters sequence to match(The sequence must not contain a range) 125 //and on false prevent piecewise occuring of characters. 126 return : tuple 127 ''' 128 if not characters: 129 raise Exception("In function {}, Character : {} => Characters cannot be None".format( 130 RegexGen.exclude.__name__, characters)) 131 # check if charaters is a range 132 try: 133 if valid_ranges(characters, is_lower_case, is_upper_case, is_number) or characters.find(RegexGen.symbolsrange) != -1: 134 pattern_prevent = False 135 except (...): 136 raise 137 138 return characters, pattern_prevent 139 140 @staticmethod 141 def boundary_character(character: str, start: bool = True) -> str: 142 ''' 143 <code>Boundary character</code> gives left or right boundary of the word as required.<br> 144 In the function definition, <code> boundary_character(character: str, start: bool = True) -> str: </code><br> 145 <ol> 146 <li> character: str signifies characters to be matched</li> 147 <li>start : bool (default = True) On true, the letter is the left boundary of the word<br> 148 and on false the letter is the right boundary of the word. 149 ''' 150 if len(character) > 2: 151 raise Exception("In function {}, start : {} => Character cannot be length greater than two", 152 RegexGen.boundary_character.__name__, start) 153 elif len(character) == 2 and character not in {"\w", "\W", "\d", "\."}: 154 raise Exception("In function {}, start : {} => Character is not a \w or \W or \d or \.", 155 RegexGen.boundary_character.__name__, start) 156 157 character_str = "\b" + character if start else character + "\b" 158 159 return character_str 160 161 162 def add_quantifier(self, min: int, max: int, **kwargs) -> str: 163 """ 164 <code>__add_quantifier</code>adds quantifiers like ? + * x(n,m).<br> 165 The function definition is: <code>__add_quantifier(self, min: int, max: int, **kwargs)</code>.<br> 166 The regex generated depends on the value of min and max. 167 <ol> 168 <li>min == max and max == 0:<br> 169 If no characters exist then the exception is raised stating min and max can't be zero. 170 <pre> regex = " "</pre> </li> 171 <li>max == min and min == 1: 172 <pre> regex = "^foo.{1,5}bar"</pre></li> 173 <li>max == min: 174 <pre> regex = "^foo.{1,5}bar"</pre></li> 175 <li>min == 0 and max == 1: 176 <pre> regex = "^foo.{1,5}bar"</pre></li> 177 <>max == 0 and min > 0: 178 <pre> regex = "^foo.{1,5}bar"</pre></li> 179 <li>max > min and min > 0: 180 <pre> regex = "^foo.{1,5}bar"</pre></li> 181 <li>Else: 182 If no characters exist then the exception is raised stating min and max can't be zero.</li> 183 </ol> 184 <pre> regex = "^foo.{1,5}bar"</pre> 185 Add Quantifiers like {0},{0,1},?,*,+,{0,1} 186 """ 187 regexchar: str = str() 188 189 if min == max and max == 0: 190 zeroormore = kwargs.get("zeroormore", False) 191 oneormore = kwargs.get("oneormore", False) 192 if zeroormore: 193 regexchar += '*' 194 elif oneormore: 195 regexchar += '+' 196 else: 197 raise Exception("In function {} => Min And Max Cannot be Zero" 198 .format(self.__add_quantifier.__name__)) 199 elif max == min and min == 1: 200 regexchar = "" 201 elif max == min: 202 regexchar = f"{{{min}}}" 203 elif min == 0 and max == 1: 204 regexchar = "?" 205 elif max == 0 and min > 0: 206 regexchar = f"{{{min},}}" 207 elif max > min and min > 0: 208 regexchar = f"{{{min},{max}}}" 209 else: 210 regexchar = f"{{,{max}}}" 211 212 return regexchar 213 214 def text(self, character: str, min: int = 0, max: int = 0, **kwargs) -> Self: 215 ''' 216 <p><code>Text</code> function simply adds the input to regex syntax.</p> 217 <pre><code><p>RegexGen.text("This is a text.") </p></code></pre> 218 219 <p>The regex is displayed as:</p> 220 <pre> regex = "This is a text."</pre> 221 Text is generated using Characters function. 222 character : str // A character can be a word, alphabet, a digit or number and symbols or a range 223 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 224 max : int => default = 0 225 capture : bool => default = False //On True enclose the character in parenthesis so that regex engine capture data 226 kwargs : dict => { 227 zeroormore : bool => default=False, 228 oneormore : bool => default=False 229 } 230 return : RegexGen 231 ''' 232 letterstr: str = str() 233 temp: str = str() 234 235 if not character: 236 raise Exception("In function {}, Character : {} => Character cannot be None".format( 237 self.text.__name__, character)) 238 239 letterstr = character 240 241 try: 242 temp = self.__add_quantifier(min, max, **kwargs) 243 except Exception as e: 244 raise 245 246 self.__regex_data += letterstr 247 self.__regex_data += temp 248 249 return self 250 251 def any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self: 252 ''' 253 <p><code>Any</code> function generates a regex which can be utilized to check if a certain character exists in the expression<br> 254 In the function definition, 255 <code>any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:</code> 256 , 257 <ol class="list-group list-group-numbered"> 258 <li class="list-group-item d-flex justify-content-between align-items-start"> 259 <div class="ms-2 me-auto"> 260 the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br> 261 </div></li> 262 <li class="list-group-item d-flex justify-content-between align-items-start"> 263 <div class="ms-2 me-auto"> 264 If on capture : bool (default=False) True is passed, it enclose . in parenthesis so that regex engine capture data. <br> 265 </div></li> 266 <li class="list-group-item d-flex justify-content-between align-items-start"> 267 <div class="ms-2 me-auto"> 268 The kwargs : dict accepts {<br> 269 zeroormore : bool (default=False), <br> 270 oneormore : bool (default=False) 271 </div></li> 272 <li class="list-group-item d-flex justify-content-between align-items-start"> 273 <div class="ms-2 me-auto"> 274 The return returns RegexGen 275 </div></li></ol><br></p> 276 <pre><code>regex = RegexGen() 277 regex = regex.any(min=0, max=12)</code></pre> 278 279 <p>The regex is displayed as:</p> 280 <pre> regex = ".{0,12}"</pre> 281 . character symbolizes any character 282 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 283 max : int => default = 0 284 capture : bool => default=False //On True enclose . in parenthesis so that regex engine capture data 285 kwargs : dict => { 286 zeroormore : bool => default=False, 287 oneormore : bool => default=False 288 } 289 return : RegexGen 290 ''' 291 anystr: str = str() 292 temp: str = str() 293 294 try: 295 temp = self.__add_quantifier(min, max, **kwargs) 296 except (...): 297 raise 298 299 anystr = f"(.{temp})" if capture else f".{temp}" 300 self.__regex_data += anystr 301 302 return self 303 304 305 def digits(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self: 306 ''' 307 This function is used to match only numbers that may not contain a sequence of number or the each numbers existing independently. 308 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 309 max : int => default = 0 310 pattern : a tuple[str, bool] expected a return type from exclude static function 311 capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data 312 kwargs : dict => { 313 zeroormore : bool => default=False, 314 oneormore : bool => default=False 315 } 316 return : RegexGen 317 ''' 318 digitstr: str = str() 319 temp: str = str() 320 321 try: 322 temp = self.__add_quantifier(min, max, **kwargs) 323 except (...): 324 raise 325 326 if pattern is None: 327 digitstr = f"(\d{temp})" if capture else f"\d{temp}" 328 elif pattern[1]: 329 digitstr = f"((?!{pattern[0]})\d){temp}" if capture else f"(?:(?!{pattern[0]})\d){temp}" 330 else: 331 digitstr = f"((?![{pattern[0]}])\d){temp}" if capture else f"(?:(?![{pattern[0]}])\d){temp}" 332 333 self.__regex_data += digitstr 334 335 return self 336 337 def alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self: 338 ''' 339 <p><code>Alphabets</code> function matches only words(not numbers) that may not contain a sequence of letters or each of the letters exist independently.<br> 340 </p> 341 In the function definition, 342 <code>alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs)</code> 343 , <br><br> 344 <ol class="list-group list-group-numbered"> 345 <li class="list-group-item d-flex justify-content-between align-items-start"> 346 <div class="ms-2 me-auto"> 347 the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br> 348 </div></li> 349 <li class="list-group-item d-flex justify-content-between align-items-start"> 350 <div class="ms-2 me-auto"> 351 In pattern, a tuple[str, bool] is expected as a return type from exclude static function. <br> 352 </div></li> 353 <li class="list-group-item d-flex justify-content-between align-items-start"> 354 <div class="ms-2 me-auto"> 355 If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that regex engine captures data.<br> 356 </div></li> 357 <li class="list-group-item d-flex justify-content-between align-items-start"> 358 <div class="ms-2 me-auto"> 359 The kwargs : dict accepts {<br> 360 zeroormore : bool (default=False), <br> 361 oneormore : bool (default=False) 362 </div></li> 363 <li class="list-group-item d-flex justify-content-between align-items-start"> 364 <div class="ms-2 me-auto"> 365 The return returns RegexGen 366 </div></li></ol><br> 367 <pre><code><p>regex = RegexGen().alphabets(1,5)</p></code></pre> 368 <p>The regex is displayed as:</p> 369 <pre> regex = "[a-zA-Z]{1,5}"</pre> 370 This function is used to match only words(not numbers) that may not contain a sequence of letters or the each letters existing independently. 371 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 372 max : int => default = 0 373 pattern : a tuple[str, bool] expected a return type from exclude static function 374 capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data 375 kwargs : dict => { 376 zeroormore : bool => default=False, 377 oneormore : bool => default=False 378 } 379 return : RegexGen 380 ''' 381 characterstr: str = str() 382 temp: str = str() 383 384 try: 385 temp = self.__add_quantifier(min, max, **kwargs) 386 except (...): 387 raise 388 389 if pattern is None: 390 characterstr = f"([a-zA-Z]{temp})" if capture else f"[a-zA-Z]{temp}" 391 elif pattern[1]: 392 characterstr = f"((?!{pattern[0]})a-zA-Z){temp}" if capture else f"(?:(?!{pattern[0]})a-zA-Z){temp}" 393 else: 394 characterstr = f"((?![{pattern[0]}])a-zA-Z){temp}\b" if capture else f"(?:(?![{pattern[0]}])a-zA-Z){temp}" 395 396 self.__regex_data += characterstr 397 398 return self 399 400 def get_non_capturing_regex(self) -> str: 401 ''' 402 If the program have capture parameters it will prevent regex engine from capturing index and patterns from the string 403 reducing capturing overhead and hence increase efficiency 404 return : str 405 ''' 406 return f"(?:{self.__regex_data})" 407 408 def get_regex_data(self) -> str: 409 ''' 410 Returns a regex syntax that may capture the text from the input string. 411 return : str 412 ''' 413 return self.__regex_data 414 415 def combine(self, regex: Self) -> Self: 416 ''' 417 <p><code>Combine</code> function creates a regex syntax to combine two regex expressions in one to create a pattern.<br> 418 </p> 419 In the function definition, 420 <code>combine(self, regex: Self)</code> 421 , the function accepts value of two different regex to perform the combination operation. <br> 422 <pre><code><p>regexa = RegexGen().digits(4,4).text(RegexGen.characters('-')) 423 regexb = RegexGen().digits(3,3) 424 regex = RegexGen.combine(regexa, regexb) </p></code></pre> 425 426 <p>The regex is displayed as:</p> 427 <pre> regex = "\d{4,4}-\d{3,3}"</pre> 428 regex : RegexGen //Object that has regex syntax which is addable 429 return : RegexGen 430 ''' 431 if len(regex.__regex_data) == 0: # and regex.__regex_data[0] == '^': 432 raise Exception("Invalid regex to combine") 433 434 self.__regex_data += regex.__regex_data 435 436 return self 437 438 @staticmethod 439 def any_of(characters: str, capture: bool = False, **kwargs) -> str: 440 ''' 441 <p><code>Any of</code> function is any_of_the_block with quantifiers or simply put, this function defines repetition of words in the list.</p> <br> 442 In the function definition, <code>any_of(characters: str, capture: bool = False, **kwargs) -> str:</code>, 443 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 444 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 445 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 446 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 447 <li>The return returns RegexGen.</li></ol> 448 449 ''' 450 if valid_ranges(characters, is_number, is_lower_case, is_upper_case) or characters.find(RegexGen.symbolsrange) != -1: 451 pass 452 453 for character in characters: 454 if not type("a").isascii(character): 455 raise Exception("In function {}, character : {} => Non ascii character is not acceptable".format( 456 RegexGen.any_of.__name__, character)) 457 return f"([{characters}])" if capture else f"(?:[{characters}])" 458 459 @staticmethod 460 def characters(char: str) -> str: 461 ''' 462 <p><code>Characters</code> function is a static function which is unable to create a regex syntax.<br> 463 Instead, a function like Text is used to submit to the regex syntax. <br> 464 Characters is used some characters predefined in the regex library are used and thus they need to be escaped. 465 <pre><code><p>RegexGen.text(RegexGen.characters("This+is{a$text.") </p></code></pre> 466 467 <p>The regex is displayed as:</p> 468 <samp> regex = "This\+is\{a\$text\."</samp><br> 469 some characters are predefined in the regex library thus they need to be escaped 470 return : str 471 </p> 472 ''' 473 letters: str = str() 474 if not char: 475 raise Exception("In function {}, character : {} => Input cannot be none ".format( 476 RegexGen.character.__name__, char)) 477 478 predefined_symbols: set = { 479 '\\', '.', '(', ')', '*', '{', '}', '^', '+', '?', '[', ']', '$', '|'} 480 481 for lettr in char: 482 if lettr in predefined_symbols: 483 letters += f"\\{lettr}" 484 else: 485 letters += lettr 486 return letters 487 488 def succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self: 489 ''' 490 This function is used to match the pattern succeeded by another pattern.<br> 491 In the function definition, <code>succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>, 492 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 493 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 494 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 495 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 496 <li>The return returns RegexGen.</li></ol> 497 ''' 498 if not preceeding or len(preceeding) != 2: 499 raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format( 500 RegexGen.succeeded_by.__name__)) 501 if not succeeding or len(succeeding) != 2: 502 raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format( 503 RegexGen.succeeded_by.__name__)) 504 505 characterstr: str = str() 506 temp: str = str() 507 508 try: 509 temp = self.__add_quantifier(min, max, **kwargs) 510 except (...): 511 raise 512 513 followblock: str = str() 514 if invert: 515 followblock = f"(?!{succeeding[0]})" if succeeding[1] else f"(?![{succeeding[0]}])" 516 else: 517 followblock = f"(?={succeeding[0]})" if succeeding[1] else f"(?=[{succeeding[0]}])" 518 519 precedingblock: str = f"{preceeding[0]}{temp}" if preceeding[1] else f"[{preceeding[0]}]{temp}" 520 521 if len(self.__regex_data) > len(precedingblock) and \ 522 self.__regex_data.rindex(precedingblock) == len(self.__regex_data)-len(precedingblock)-1: 523 characterstr += followblock 524 self.__regex_data = self.__regex_data[:-1] 525 characterstr += ')' 526 else: 527 characterstr = precedingblock + followblock 528 characterstr = f"({characterstr})" if capture else f"(?:{characterstr})" 529 self.__regex_data += characterstr 530 return self 531 532 def preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self: 533 ''' 534 This function is used to match pattern that is preceded by another pattern.<br> 535 If the pattern of the succeeded_by and preceeded_by matches the combination is union. <br> 536 In the function definition, <code>preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>, 537 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 538 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 539 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 540 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 541 <li>The return returns RegexGen.</li></ol> 542 ''' 543 if not preceding or len(preceding) != 2: 544 raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format( 545 RegexGen.preceded_by.__name__)) 546 if not succeeding or len(succeeding) != 2: 547 raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format( 548 RegexGen.preceded_by.__name__)) 549 550 characterstr: str = str() 551 temp: str = str() 552 553 try: 554 temp = self.__add_quantifier(min, max, **kwargs) 555 except (...): 556 raise 557 558 preceedingblock: str = str() 559 if invert: 560 preceedingblock = f"(?<!{preceding[0]})" if preceding[1] else f"(?<![{preceding[0]}])" 561 else: 562 preceedingblock = f"(?<={preceding[0]})" if preceding[1] else f"(?<=[{preceding[0]}])" 563 564 followblock: str = f"{succeeding[0]}{temp}" if succeeding[1] else f"[{succeeding[0]}]{temp}" 565 characterstr = preceedingblock + followblock 566 characterstr = f"({characterstr})" if capture else f"(?:{characterstr})" 567 self.__regex_data += characterstr 568 return self 569 570 # @staticmethod 571 # def any_of(characters: tuple[dict], capture: bool = False, **kwargs) -> str: 572 # ''' 573 # This function is any_of_the_block with quantifiers or this function defines repetition of words in the list. <br> 574 # In the function definition, <code>any_of(characters: tuple[dict], capture: bool = False, **kwargs) -> str:</code>, 575 # <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 576 # <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 577 # <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 578 # <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 579 # <li>The return returns RegexGen.</li></ol> 580 # ''' 581 # character_str = str() 582 # tempstr = str() 583 584 # if not len(characters): 585 # return "" 586 587 # character_pair = list() 588 # for index, listitem in enumerate(characters): 589 # character = listitem.pop("character", None) 590 # min = listitem.pop("min", 0) 591 # max = listitem.pop("max", 0) 592 # if character is None: 593 # raise Exception("In function {}, at index {} doesn't have character pair.".format( 594 # RegexGen.any_of.__name__, index)) 595 # if len(character) == 0: 596 # continue 597 # elif len(character) == 1 or (len(character) == 2 and character in {"\s", "\d", "\w", "\W"}): 598 # pass 599 # elif len(character) == 3 and valid_ranges(character, is_lower_case, is_number, is_upper_case): 600 # pass 601 # else: 602 # raise Exception("In function {}, at index {}, Unknown Character: {}.".format( 603 # RegexGen.any_of.__name__, index, character)) 604 # tempstr = RegexGen.__add_quantifier(min=min, max=max, **listitem) 605 # character_pair.append(character+tempstr) 606 607 # character_str = "|".join(character_pair) 608 # return f"({character_str})" if capture else f"(?:{character_str})"
Start of the class
33 def linestartwith(self): 34 ''' 35 <code>linestartwith</code> adds a expression to indicate beginning of the string and is always added on a new line.<br></p> 36 The function definition is: 37 `linestartwith(self)` 38 <p>When this function is called, the function adds the expression '^' if no regex data exists already.</p> 39 <p>If the regex data already exists then the function adds the expression '\\n^' indicating line is changed before signifying beginning of the string. </p> 40 <p><code>Linestartwith</code> function can be used in combination with other function to create a regex syntax to check if the expression starts with certain pattern.<br> 41 </p> 42 ``` 43 regex = RegexGen().linestartwith().text('foo').any(1,5).text('bar').endofline() 44 ``` 45 <p>This code checks for text starting with 'foo' and starting with 'bar' after any characters of length of min 1 and max 5 digits.</p> 46 <p>The regex is displayed as:</p> 47 <samp> regex = "^foo.{1,5}bar"</samp></p> 48 ^ character symbolizes the start of the string or line. 49 ''' 50 if not len(self.__regex_data): 51 self.__regex_data += '^' 52 else: 53 self.__regex_data += '\n^' 54 return self
linestartwith adds a expression to indicate beginning of the string and is always added on a new line.
linestartwith(self)
When this function is called, the function adds the expression '^' if no regex data exists already.
If the regex data already exists then the function adds the expression '\n^' indicating line is changed before signifying beginning of the string.
Linestartwith function can be used in combination with other function to create a regex syntax to check if the expression starts with certain pattern.
regex = RegexGen().linestartwith().text('foo').any(1,5).text('bar').endofline()
This code checks for text starting with 'foo' and starting with 'bar' after any characters of length of min 1 and max 5 digits.
The regex is displayed as:
regex = "^foo.{1,5}bar" ^ character symbolizes the start of the string or line.58 def endofline(self): 59 ''' 60 <p><code>endofline</code> adds a expression to indicates end of the string and end of the line.<br> 61 The function definition is: 62 <code> 63 endofline(self) 64 </code> 65 <p>When this function is called, the function adds the expression '$' to the regex data. 66 <p>If the regex data already exists then the function adds the expression '\n^' indicating line is changed before signifying beginning of the string. 67 <p><code>endofline</code> function can be used in combination with other function to create a regex syntax to check if the expression ends with certain pattern.<br> 68 <pre><code><p>regex = RegexGen().regex.text('abc', 1, 1).endofline()</p></code></pre> 69 <p>This code checks for text ending with 'abc' after any characters of any length 70 <p>The regex is displayed as:<br> 71 <samp> regex = "abc$"</samp><br> 72 $ character symbolizes as end of a line 73 </p></p></p></p></p> 74 ''' 75 self.__regex_data += '$' 76 return self
endofline adds a expression to indicates end of the string and end of the line.
The function definition is:
endofline(self)
When this function is called, the function adds the expression '$' to the regex data.
If the regex data already exists then the function adds the expression ' ^' indicating line is changed before signifying beginning of the string.
endofline function can be used in combination with other function to create a regex syntax to check if the expression ends with certain pattern.
regex = RegexGen().regex.text('abc', 1, 1).endofline()
This code checks for text ending with 'abc' after any characters of any length
The regex is displayed as:
regex = "abc$"
$ character symbolizes as end of a line
78 @staticmethod 79 def range(start: str, end: str) -> str: 80 ''' 81 <code>Range</code> function provides syntax for defining range.<br><br> 82 In the function definition, 83 <code>range(start: str, end: str)</code> <br> 84 1. start accepts string for starting range whose length must be 1. 85 2. end accepts string for ending range whose length must be 1. 86 3. return returns the range in format <start>-<end> 87 88 <pre><code><p>regex = RegexGen().regex.range('a', 'z')</p></code></pre> 89 <p>The regex is displayed as:</p> 90 <samp> regex = "[a-z]"</samp><br> 91 Range for symbols will throw an error 92 start : str // length must be 1 93 end : str //length must be 1 94 return : str //returns the range in format <start>-<end> 95 ''' 96 if (not start and not end) and (len(start) > 1 and len(end) > 1): 97 raise Exception("In function {}, range_start : {}, range_end:{} => Characters cannot be None".format( 98 RegexGen.range.__name__, start, end)) 99 100 # check if range is valid 101 character_range = f"({start}-{end})" 102 if (valid_ranges(character_range, is_lower_case, is_upper_case, is_number)): 103 return character_range 104 raise Exception("In function {}, range_start : {}, range_end:{} => This is not a valid range. Valid ranges are 0-9,A-Z or a-z or \W".format( 105 RegexGen.range.__name__, start, end))
Range function provides syntax for defining range.
In the function definition,
range(start: str, end: str)
1. start accepts string for starting range whose length must be 1.
2. end accepts string for ending range whose length must be 1.
3. return returns the range in format
regex = RegexGen().regex.range('a', 'z')
The regex is displayed as:
regex = "[a-z]"
Range for symbols will throw an error
start : str // length must be 1
end : str //length must be 1
return : str //returns the range in format
108 @staticmethod 109 def exclude(characters: str, pattern_prevent: bool = False) -> Tuple[str, bool]: 110 ''' 111 <code>Exclude</code> function is a static function. It excludes certain pattern based upon the input of the user.<br> 112 In the function definition, <code>exclude(characters: str, pattern_prevent: bool = False) -> tuple</code>, 113 <ol><li> the characters : str signifies characters to be matched,</li> 114 <li>pattern_prevent : str (default = False) Here, on True, prevents the characters sequence to match(The sequence must not contain a range) 115 and on false prevent piecewise occuring of characters. 116 and returns a tuple </li> 117 118 119 <pre><code><p>RegexGen().digits(1, 10, RegexGen.exclude("23", True), capture=True) </p></code></pre> 120 121 <p>The regex is displayed as:<br> 122 <samp> regex = "\b(?:(?![23])\d)+\b"</samp><br> 123 characters : str //characters to be matched 124 pattern_prevent : bool => default = False //On True, prevents the characters sequence to match(The sequence must not contain a range) 125 //and on false prevent piecewise occuring of characters. 126 return : tuple 127 ''' 128 if not characters: 129 raise Exception("In function {}, Character : {} => Characters cannot be None".format( 130 RegexGen.exclude.__name__, characters)) 131 # check if charaters is a range 132 try: 133 if valid_ranges(characters, is_lower_case, is_upper_case, is_number) or characters.find(RegexGen.symbolsrange) != -1: 134 pattern_prevent = False 135 except (...): 136 raise 137 138 return characters, pattern_prevent
Exclude function is a static function. It excludes certain pattern based upon the input of the user.
In the function definition, exclude(characters: str, pattern_prevent: bool = False) -> tuple,
- the characters : str signifies characters to be matched,
- pattern_prevent : str (default = False) Here, on True, prevents the characters sequence to match(The sequence must not contain a range) and on false prevent piecewise occuring of characters. and returns a tuple
RegexGen().digits(1, 10, RegexGen.exclude("23", True), capture=True)
The regex is displayed as:
regex = "(?:(?![23])\d)+"
characters : str //characters to be matched
pattern_prevent : bool => default = False //On True, prevents the characters sequence to match(The sequence must not contain a range)
//and on false prevent piecewise occuring of characters.
return : tuple
140 @staticmethod 141 def boundary_character(character: str, start: bool = True) -> str: 142 ''' 143 <code>Boundary character</code> gives left or right boundary of the word as required.<br> 144 In the function definition, <code> boundary_character(character: str, start: bool = True) -> str: </code><br> 145 <ol> 146 <li> character: str signifies characters to be matched</li> 147 <li>start : bool (default = True) On true, the letter is the left boundary of the word<br> 148 and on false the letter is the right boundary of the word. 149 ''' 150 if len(character) > 2: 151 raise Exception("In function {}, start : {} => Character cannot be length greater than two", 152 RegexGen.boundary_character.__name__, start) 153 elif len(character) == 2 and character not in {"\w", "\W", "\d", "\."}: 154 raise Exception("In function {}, start : {} => Character is not a \w or \W or \d or \.", 155 RegexGen.boundary_character.__name__, start) 156 157 character_str = "\b" + character if start else character + "\b" 158 159 return character_str
Boundary character gives left or right boundary of the word as required.
In the function definition, boundary_character(character: str, start: bool = True) -> str:
- character: str signifies characters to be matched
- start : bool (default = True) On true, the letter is the left boundary of the word
and on false the letter is the right boundary of the word.
162 def add_quantifier(self, min: int, max: int, **kwargs) -> str: 163 """ 164 <code>__add_quantifier</code>adds quantifiers like ? + * x(n,m).<br> 165 The function definition is: <code>__add_quantifier(self, min: int, max: int, **kwargs)</code>.<br> 166 The regex generated depends on the value of min and max. 167 <ol> 168 <li>min == max and max == 0:<br> 169 If no characters exist then the exception is raised stating min and max can't be zero. 170 <pre> regex = " "</pre> </li> 171 <li>max == min and min == 1: 172 <pre> regex = "^foo.{1,5}bar"</pre></li> 173 <li>max == min: 174 <pre> regex = "^foo.{1,5}bar"</pre></li> 175 <li>min == 0 and max == 1: 176 <pre> regex = "^foo.{1,5}bar"</pre></li> 177 <>max == 0 and min > 0: 178 <pre> regex = "^foo.{1,5}bar"</pre></li> 179 <li>max > min and min > 0: 180 <pre> regex = "^foo.{1,5}bar"</pre></li> 181 <li>Else: 182 If no characters exist then the exception is raised stating min and max can't be zero.</li> 183 </ol> 184 <pre> regex = "^foo.{1,5}bar"</pre> 185 Add Quantifiers like {0},{0,1},?,*,+,{0,1} 186 """ 187 regexchar: str = str() 188 189 if min == max and max == 0: 190 zeroormore = kwargs.get("zeroormore", False) 191 oneormore = kwargs.get("oneormore", False) 192 if zeroormore: 193 regexchar += '*' 194 elif oneormore: 195 regexchar += '+' 196 else: 197 raise Exception("In function {} => Min And Max Cannot be Zero" 198 .format(self.__add_quantifier.__name__)) 199 elif max == min and min == 1: 200 regexchar = "" 201 elif max == min: 202 regexchar = f"{{{min}}}" 203 elif min == 0 and max == 1: 204 regexchar = "?" 205 elif max == 0 and min > 0: 206 regexchar = f"{{{min},}}" 207 elif max > min and min > 0: 208 regexchar = f"{{{min},{max}}}" 209 else: 210 regexchar = f"{{,{max}}}" 211 212 return regexchar
__add_quantifieradds quantifiers like ? + * x(n,m).
The function definition is: __add_quantifier(self, min: int, max: int, **kwargs).
The regex generated depends on the value of min and max.
- min == max and max == 0:
If no characters exist then the exception is raised stating min and max can't be zero.
regex = " "
- max == min and min == 1:
regex = "^foo.{1,5}bar" - max == min:
regex = "^foo.{1,5}bar" - min == 0 and max == 1:
regex = "^foo.{1,5}bar" - max > min and min > 0:
regex = "^foo.{1,5}bar" - Else:
If no characters exist then the exception is raised stating min and max can't be zero.
<>max == 0 and min > 0:
regex = "^foo.{1,5}bar" regex = "^foo.{1,5}bar"
Add Quantifiers like {0},{0,1},?,*,+,{0,1}
214 def text(self, character: str, min: int = 0, max: int = 0, **kwargs) -> Self: 215 ''' 216 <p><code>Text</code> function simply adds the input to regex syntax.</p> 217 <pre><code><p>RegexGen.text("This is a text.") </p></code></pre> 218 219 <p>The regex is displayed as:</p> 220 <pre> regex = "This is a text."</pre> 221 Text is generated using Characters function. 222 character : str // A character can be a word, alphabet, a digit or number and symbols or a range 223 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 224 max : int => default = 0 225 capture : bool => default = False //On True enclose the character in parenthesis so that regex engine capture data 226 kwargs : dict => { 227 zeroormore : bool => default=False, 228 oneormore : bool => default=False 229 } 230 return : RegexGen 231 ''' 232 letterstr: str = str() 233 temp: str = str() 234 235 if not character: 236 raise Exception("In function {}, Character : {} => Character cannot be None".format( 237 self.text.__name__, character)) 238 239 letterstr = character 240 241 try: 242 temp = self.__add_quantifier(min, max, **kwargs) 243 except Exception as e: 244 raise 245 246 self.__regex_data += letterstr 247 self.__regex_data += temp 248 249 return self
Text function simply adds the input to regex syntax.
RegexGen.text("This is a text.")
The regex is displayed as:
regex = "This is a text."
Text is generated using Characters function. character : str // A character can be a word, alphabet, a digit or number and symbols or a range min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True max : int => default = 0 capture : bool => default = False //On True enclose the character in parenthesis so that regex engine capture data kwargs : dict => { zeroormore : bool => default=False, oneormore : bool => default=False } return : RegexGen
251 def any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self: 252 ''' 253 <p><code>Any</code> function generates a regex which can be utilized to check if a certain character exists in the expression<br> 254 In the function definition, 255 <code>any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:</code> 256 , 257 <ol class="list-group list-group-numbered"> 258 <li class="list-group-item d-flex justify-content-between align-items-start"> 259 <div class="ms-2 me-auto"> 260 the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br> 261 </div></li> 262 <li class="list-group-item d-flex justify-content-between align-items-start"> 263 <div class="ms-2 me-auto"> 264 If on capture : bool (default=False) True is passed, it enclose . in parenthesis so that regex engine capture data. <br> 265 </div></li> 266 <li class="list-group-item d-flex justify-content-between align-items-start"> 267 <div class="ms-2 me-auto"> 268 The kwargs : dict accepts {<br> 269 zeroormore : bool (default=False), <br> 270 oneormore : bool (default=False) 271 </div></li> 272 <li class="list-group-item d-flex justify-content-between align-items-start"> 273 <div class="ms-2 me-auto"> 274 The return returns RegexGen 275 </div></li></ol><br></p> 276 <pre><code>regex = RegexGen() 277 regex = regex.any(min=0, max=12)</code></pre> 278 279 <p>The regex is displayed as:</p> 280 <pre> regex = ".{0,12}"</pre> 281 . character symbolizes any character 282 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 283 max : int => default = 0 284 capture : bool => default=False //On True enclose . in parenthesis so that regex engine capture data 285 kwargs : dict => { 286 zeroormore : bool => default=False, 287 oneormore : bool => default=False 288 } 289 return : RegexGen 290 ''' 291 anystr: str = str() 292 temp: str = str() 293 294 try: 295 temp = self.__add_quantifier(min, max, **kwargs) 296 except (...): 297 raise 298 299 anystr = f"(.{temp})" if capture else f".{temp}" 300 self.__regex_data += anystr 301 302 return self
Any function generates a regex which can be utilized to check if a certain character exists in the expression
In the function definition,
any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:
,
-
the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0).
-
If on capture : bool (default=False) True is passed, it enclose . in parenthesis so that regex engine capture data.
-
The kwargs : dict accepts {
zeroormore : bool (default=False),
oneormore : bool (default=False) -
The return returns RegexGen
regex = RegexGen()
regex = regex.any(min=0, max=12)
The regex is displayed as:
regex = ".{0,12}"
. character symbolizes any character min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True max : int => default = 0 capture : bool => default=False //On True enclose . in parenthesis so that regex engine capture data kwargs : dict => { zeroormore : bool => default=False, oneormore : bool => default=False } return : RegexGen
305 def digits(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self: 306 ''' 307 This function is used to match only numbers that may not contain a sequence of number or the each numbers existing independently. 308 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 309 max : int => default = 0 310 pattern : a tuple[str, bool] expected a return type from exclude static function 311 capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data 312 kwargs : dict => { 313 zeroormore : bool => default=False, 314 oneormore : bool => default=False 315 } 316 return : RegexGen 317 ''' 318 digitstr: str = str() 319 temp: str = str() 320 321 try: 322 temp = self.__add_quantifier(min, max, **kwargs) 323 except (...): 324 raise 325 326 if pattern is None: 327 digitstr = f"(\d{temp})" if capture else f"\d{temp}" 328 elif pattern[1]: 329 digitstr = f"((?!{pattern[0]})\d){temp}" if capture else f"(?:(?!{pattern[0]})\d){temp}" 330 else: 331 digitstr = f"((?![{pattern[0]}])\d){temp}" if capture else f"(?:(?![{pattern[0]}])\d){temp}" 332 333 self.__regex_data += digitstr 334 335 return self
This function is used to match only numbers that may not contain a sequence of number or the each numbers existing independently. min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True max : int => default = 0 pattern : a tuple[str, bool] expected a return type from exclude static function capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data kwargs : dict => { zeroormore : bool => default=False, oneormore : bool => default=False } return : RegexGen
337 def alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self: 338 ''' 339 <p><code>Alphabets</code> function matches only words(not numbers) that may not contain a sequence of letters or each of the letters exist independently.<br> 340 </p> 341 In the function definition, 342 <code>alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs)</code> 343 , <br><br> 344 <ol class="list-group list-group-numbered"> 345 <li class="list-group-item d-flex justify-content-between align-items-start"> 346 <div class="ms-2 me-auto"> 347 the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br> 348 </div></li> 349 <li class="list-group-item d-flex justify-content-between align-items-start"> 350 <div class="ms-2 me-auto"> 351 In pattern, a tuple[str, bool] is expected as a return type from exclude static function. <br> 352 </div></li> 353 <li class="list-group-item d-flex justify-content-between align-items-start"> 354 <div class="ms-2 me-auto"> 355 If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that regex engine captures data.<br> 356 </div></li> 357 <li class="list-group-item d-flex justify-content-between align-items-start"> 358 <div class="ms-2 me-auto"> 359 The kwargs : dict accepts {<br> 360 zeroormore : bool (default=False), <br> 361 oneormore : bool (default=False) 362 </div></li> 363 <li class="list-group-item d-flex justify-content-between align-items-start"> 364 <div class="ms-2 me-auto"> 365 The return returns RegexGen 366 </div></li></ol><br> 367 <pre><code><p>regex = RegexGen().alphabets(1,5)</p></code></pre> 368 <p>The regex is displayed as:</p> 369 <pre> regex = "[a-zA-Z]{1,5}"</pre> 370 This function is used to match only words(not numbers) that may not contain a sequence of letters or the each letters existing independently. 371 min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 372 max : int => default = 0 373 pattern : a tuple[str, bool] expected a return type from exclude static function 374 capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data 375 kwargs : dict => { 376 zeroormore : bool => default=False, 377 oneormore : bool => default=False 378 } 379 return : RegexGen 380 ''' 381 characterstr: str = str() 382 temp: str = str() 383 384 try: 385 temp = self.__add_quantifier(min, max, **kwargs) 386 except (...): 387 raise 388 389 if pattern is None: 390 characterstr = f"([a-zA-Z]{temp})" if capture else f"[a-zA-Z]{temp}" 391 elif pattern[1]: 392 characterstr = f"((?!{pattern[0]})a-zA-Z){temp}" if capture else f"(?:(?!{pattern[0]})a-zA-Z){temp}" 393 else: 394 characterstr = f"((?![{pattern[0]}])a-zA-Z){temp}\b" if capture else f"(?:(?![{pattern[0]}])a-zA-Z){temp}" 395 396 self.__regex_data += characterstr 397 398 return self
Alphabets function matches only words(not numbers) that may not contain a sequence of letters or each of the letters exist independently.
In the function definition,
alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs)
,
-
the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0).
-
In pattern, a tuple[str, bool] is expected as a return type from exclude static function.
-
If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that regex engine captures data.
-
The kwargs : dict accepts {
zeroormore : bool (default=False),
oneormore : bool (default=False)
-
The return returns RegexGen
regex = RegexGen().alphabets(1,5)
The regex is displayed as:
regex = "[a-zA-Z]{1,5}"
This function is used to match only words(not numbers) that may not contain a sequence of letters or the each letters existing independently. min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True max : int => default = 0 pattern : a tuple[str, bool] expected a return type from exclude static function capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data kwargs : dict => { zeroormore : bool => default=False, oneormore : bool => default=False } return : RegexGen
400 def get_non_capturing_regex(self) -> str: 401 ''' 402 If the program have capture parameters it will prevent regex engine from capturing index and patterns from the string 403 reducing capturing overhead and hence increase efficiency 404 return : str 405 ''' 406 return f"(?:{self.__regex_data})"
If the program have capture parameters it will prevent regex engine from capturing index and patterns from the string reducing capturing overhead and hence increase efficiency return : str
408 def get_regex_data(self) -> str: 409 ''' 410 Returns a regex syntax that may capture the text from the input string. 411 return : str 412 ''' 413 return self.__regex_data
Returns a regex syntax that may capture the text from the input string. return : str
415 def combine(self, regex: Self) -> Self: 416 ''' 417 <p><code>Combine</code> function creates a regex syntax to combine two regex expressions in one to create a pattern.<br> 418 </p> 419 In the function definition, 420 <code>combine(self, regex: Self)</code> 421 , the function accepts value of two different regex to perform the combination operation. <br> 422 <pre><code><p>regexa = RegexGen().digits(4,4).text(RegexGen.characters('-')) 423 regexb = RegexGen().digits(3,3) 424 regex = RegexGen.combine(regexa, regexb) </p></code></pre> 425 426 <p>The regex is displayed as:</p> 427 <pre> regex = "\d{4,4}-\d{3,3}"</pre> 428 regex : RegexGen //Object that has regex syntax which is addable 429 return : RegexGen 430 ''' 431 if len(regex.__regex_data) == 0: # and regex.__regex_data[0] == '^': 432 raise Exception("Invalid regex to combine") 433 434 self.__regex_data += regex.__regex_data 435 436 return self
Combine function creates a regex syntax to combine two regex expressions in one to create a pattern.
In the function definition,
combine(self, regex: Self)
, the function accepts value of two different regex to perform the combination operation.
regexa = RegexGen().digits(4,4).text(RegexGen.characters('-'))
regexb = RegexGen().digits(3,3)
regex = RegexGen.combine(regexa, regexb)
The regex is displayed as:
regex = "\d{4,4}-\d{3,3}"
regex : RegexGen //Object that has regex syntax which is addable return : RegexGen
438 @staticmethod 439 def any_of(characters: str, capture: bool = False, **kwargs) -> str: 440 ''' 441 <p><code>Any of</code> function is any_of_the_block with quantifiers or simply put, this function defines repetition of words in the list.</p> <br> 442 In the function definition, <code>any_of(characters: str, capture: bool = False, **kwargs) -> str:</code>, 443 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 444 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 445 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 446 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 447 <li>The return returns RegexGen.</li></ol> 448 449 ''' 450 if valid_ranges(characters, is_number, is_lower_case, is_upper_case) or characters.find(RegexGen.symbolsrange) != -1: 451 pass 452 453 for character in characters: 454 if not type("a").isascii(character): 455 raise Exception("In function {}, character : {} => Non ascii character is not acceptable".format( 456 RegexGen.any_of.__name__, character)) 457 return f"([{characters}])" if capture else f"(?:[{characters}])"
Any of function is any_of_the_block with quantifiers or simply put, this function defines repetition of words in the list.
In the function definition,
any_of(characters: str, capture: bool = False, **kwargs) -> str:,
- The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .
- In pattern, a tuple[str, bool] is expected as a return type from exclude static function.
- If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.
- The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).
- The return returns RegexGen.
459 @staticmethod 460 def characters(char: str) -> str: 461 ''' 462 <p><code>Characters</code> function is a static function which is unable to create a regex syntax.<br> 463 Instead, a function like Text is used to submit to the regex syntax. <br> 464 Characters is used some characters predefined in the regex library are used and thus they need to be escaped. 465 <pre><code><p>RegexGen.text(RegexGen.characters("This+is{a$text.") </p></code></pre> 466 467 <p>The regex is displayed as:</p> 468 <samp> regex = "This\+is\{a\$text\."</samp><br> 469 some characters are predefined in the regex library thus they need to be escaped 470 return : str 471 </p> 472 ''' 473 letters: str = str() 474 if not char: 475 raise Exception("In function {}, character : {} => Input cannot be none ".format( 476 RegexGen.character.__name__, char)) 477 478 predefined_symbols: set = { 479 '\\', '.', '(', ')', '*', '{', '}', '^', '+', '?', '[', ']', '$', '|'} 480 481 for lettr in char: 482 if lettr in predefined_symbols: 483 letters += f"\\{lettr}" 484 else: 485 letters += lettr 486 return letters
Characters function is a static function which is unable to create a regex syntax.
Instead, a function like Text is used to submit to the regex syntax.
Characters is used some characters predefined in the regex library are used and thus they need to be escaped.
RegexGen.text(RegexGen.characters("This+is{a$text.")
The regex is displayed as:
regex = "This\+is\{a\$text\."some characters are predefined in the regex library thus they need to be escaped return : str
488 def succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self: 489 ''' 490 This function is used to match the pattern succeeded by another pattern.<br> 491 In the function definition, <code>succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>, 492 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 493 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 494 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 495 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 496 <li>The return returns RegexGen.</li></ol> 497 ''' 498 if not preceeding or len(preceeding) != 2: 499 raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format( 500 RegexGen.succeeded_by.__name__)) 501 if not succeeding or len(succeeding) != 2: 502 raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format( 503 RegexGen.succeeded_by.__name__)) 504 505 characterstr: str = str() 506 temp: str = str() 507 508 try: 509 temp = self.__add_quantifier(min, max, **kwargs) 510 except (...): 511 raise 512 513 followblock: str = str() 514 if invert: 515 followblock = f"(?!{succeeding[0]})" if succeeding[1] else f"(?![{succeeding[0]}])" 516 else: 517 followblock = f"(?={succeeding[0]})" if succeeding[1] else f"(?=[{succeeding[0]}])" 518 519 precedingblock: str = f"{preceeding[0]}{temp}" if preceeding[1] else f"[{preceeding[0]}]{temp}" 520 521 if len(self.__regex_data) > len(precedingblock) and \ 522 self.__regex_data.rindex(precedingblock) == len(self.__regex_data)-len(precedingblock)-1: 523 characterstr += followblock 524 self.__regex_data = self.__regex_data[:-1] 525 characterstr += ')' 526 else: 527 characterstr = precedingblock + followblock 528 characterstr = f"({characterstr})" if capture else f"(?:{characterstr})" 529 self.__regex_data += characterstr 530 return self
This function is used to match the pattern succeeded by another pattern.
In the function definition, succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:,
- The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .
- In pattern, a tuple[str, bool] is expected as a return type from exclude static function.
- If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.
- The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).
- The return returns RegexGen.
532 def preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self: 533 ''' 534 This function is used to match pattern that is preceded by another pattern.<br> 535 If the pattern of the succeeded_by and preceeded_by matches the combination is union. <br> 536 In the function definition, <code>preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>, 537 <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li> 538 <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li> 539 <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li> 540 <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li> 541 <li>The return returns RegexGen.</li></ol> 542 ''' 543 if not preceding or len(preceding) != 2: 544 raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format( 545 RegexGen.preceded_by.__name__)) 546 if not succeeding or len(succeeding) != 2: 547 raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format( 548 RegexGen.preceded_by.__name__)) 549 550 characterstr: str = str() 551 temp: str = str() 552 553 try: 554 temp = self.__add_quantifier(min, max, **kwargs) 555 except (...): 556 raise 557 558 preceedingblock: str = str() 559 if invert: 560 preceedingblock = f"(?<!{preceding[0]})" if preceding[1] else f"(?<![{preceding[0]}])" 561 else: 562 preceedingblock = f"(?<={preceding[0]})" if preceding[1] else f"(?<=[{preceding[0]}])" 563 564 followblock: str = f"{succeeding[0]}{temp}" if succeeding[1] else f"[{succeeding[0]}]{temp}" 565 characterstr = preceedingblock + followblock 566 characterstr = f"({characterstr})" if capture else f"(?:{characterstr})" 567 self.__regex_data += characterstr 568 return self
This function is used to match pattern that is preceded by another pattern.
If the pattern of the succeeded_by and preceeded_by matches the combination is union.
In the function definition, preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:,
- The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .
- In pattern, a tuple[str, bool] is expected as a return type from exclude static function.
- If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.
- The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).
- The return returns RegexGen.