regex

View Source

  1from typing_extensions import Self
  2from typing import Tuple
  3import re
  4
  5"""
  6Docs for RegexGenerator
  7"""
  8
  9class RegexGen:
 10    """
 11    Start of the class
 12    """
 13    # ranges
 14    lowercaserange: str = "[a-z]"
 15    uppercaserange: str = "[A-Z]"
 16    digitsrange: str = "[0-9]"
 17    symbolsrange: str = "\W"
 18    alphanumeric: str = "\w"
 19    # ecape sequences
 20    block_word: str = "\b"
 21    nonblock_word: str = "\B"
 22    new_line: str = "\n"
 23    tab_space: str = "\t"
 24    carriage_return: str = "\r"
 25    whitespace = "\s"
 26
 27    def __init__(self):
 28        self.__regex_data: str = str()
 29
 30
 31
 32    def linestartwith(self):
 33        '''
 34        <code>linestartwith</code> adds a expression to indicate beginning of the string and is always added on a new line.<br></p>
 35        The function definition is:  
 36            `linestartwith(self)`
 37        <p>When this function is called, the function adds the expression '^' if no regex data exists already.</p>
 38        <p>If the regex data already exists then the function adds the expression '\\n^' indicating line is changed before signifying beginning of the string.  </p>
 39        <p><code>Linestartwith</code> function can be used in combination with other function to create a regex syntax to check if the expression starts with certain pattern.<br>
 40        </p>
 41        ```
 42        regex = RegexGen().linestartwith().text('foo').any(1,5).text('bar').endofline()
 43        ```       
 44        <p>This code checks for text starting with 'foo' and starting with 'bar' after any characters of length of min 1 and max 5 digits.</p>
 45        <p>The regex is displayed as:</p>
 46        <samp> regex = "^foo.{1,5}bar"</samp></p>
 47    ^ character symbolizes the start of the string or line.
 48    '''
 49        if not len(self.__regex_data):
 50            self.__regex_data += '^'
 51        else:
 52            self.__regex_data += '\n^'
 53        return self
 54
 55
 56
 57    def endofline(self):
 58        '''
 59        <p><code>endofline</code> adds a expression to indicates end of the string and end of the line.<br>
 60            The function definition is: 
 61          <code>
 62           endofline(self) 
 63           </code>
 64        <p>When this function is called, the function adds the expression '$' to the regex data.
 65        <p>If the regex data already exists then the function adds the expression '\n^' indicating line is changed before signifying beginning of the string.  
 66        <p><code>endofline</code> function can be used in combination with other function to create a regex syntax to check if the expression ends with certain pattern.<br>
 67        <pre><code><p>regex = RegexGen().regex.text('abc', 1, 1).endofline()</p></code></pre>           
 68        <p>This code checks for text ending with 'abc' after any characters of any length 
 69        <p>The regex is displayed as:<br>
 70        <samp> regex = "abc$"</samp><br>
 71        $ character symbolizes as end of a line
 72        </p></p></p></p></p>
 73        '''
 74        self.__regex_data += '$'
 75        return self
 76    
 77    @staticmethod
 78    def range(start: str, end: str) -> str:
 79        '''
 80        <code>Range</code> function provides syntax for defining range.<br><br>
 81            In the function definition, 
 82            <code>range(start: str, end: str)</code> <br>
 83                    1. start accepts string for starting range whose length must be 1.                           
 84                    2. end accepts string for ending range whose length must be 1.                             
 85                    3. return returns the range in format <start>-<end>
 86        
 87        <pre><code><p>regex = RegexGen().regex.range('a', 'z')</p></code></pre>           
 88        <p>The regex is displayed as:</p>
 89        <samp> regex = "[a-z]"</samp><br>
 90        Range for symbols will throw an error
 91        start : str // length must be 1
 92        end : str  //length must be 1
 93        return : str //returns the range in format <start>-<end>
 94    '''
 95        if (not start and not end) and (len(start) > 1 and len(end) > 1):
 96            raise Exception("In function {}, range_start : {}, range_end:{} => Characters cannot be None".format(
 97                RegexGen.range.__name__, start, end))
 98
 99        # check if range is valid
100        character_range = f"({start}-{end})"
101        if (valid_ranges(character_range, is_lower_case, is_upper_case, is_number)):
102            return character_range
103        raise Exception("In function {}, range_start : {}, range_end:{} => This is not a valid range. Valid ranges are 0-9,A-Z or a-z or \W".format(
104            RegexGen.range.__name__, start, end))
105
106
107    @staticmethod
108    def exclude(characters: str, pattern_prevent: bool = False) -> Tuple[str, bool]:
109        '''
110        <code>Exclude</code> function is a static function. It excludes certain pattern based upon the input of the user.<br>
111        In the function definition, <code>exclude(characters: str, pattern_prevent: bool = False) -> tuple</code>,
112            <ol><li> the characters : str signifies characters to be matched,</li> 
113            <li>pattern_prevent : str (default = False) Here, on True, prevents the characters sequence to match(The sequence must not contain a range) 
114                    and on false prevent piecewise occuring of characters.
115                    and returns a tuple   </li>                 
116                
117            
118    <pre><code><p>RegexGen().digits(1, 10, RegexGen.exclude("23", True), capture=True) </p></code></pre>           
119    
120    <p>The regex is displayed as:<br>
121    <samp> regex = "\b(?:(?![23])\d)+\b"</samp><br>
122        characters : str  //characters to be matched
123        pattern_prevent : bool  => default = False //On True, prevents the characters sequence to match(The sequence must not contain a range) 
124                                                //and on false prevent piecewise occuring of characters.
125        return : tuple
126    '''
127        if not characters:
128            raise Exception("In function {}, Character : {} => Characters cannot be None".format(
129                RegexGen.exclude.__name__, characters))
130        # check if charaters is a range
131        try:
132            if valid_ranges(characters, is_lower_case, is_upper_case, is_number) or characters.find(RegexGen.symbolsrange) != -1:
133                pattern_prevent = False
134        except (...):
135            raise
136
137        return characters, pattern_prevent
138
139    @staticmethod
140    def boundary_character(character: str, start: bool = True) -> str:
141        '''
142        <code>Boundary character</code> gives left or right boundary of the word as required.<br>
143        In the function definition, <code> boundary_character(character: str, start: bool = True) -> str: </code><br>
144        <ol>
145        <li> character: str signifies characters to be matched</li>
146        <li>start : bool (default = True) On true, the letter is the left boundary of the word<br> 
147                                        and on false the letter is the right boundary of the word.
148    '''
149        if len(character) > 2:
150            raise Exception("In function {}, start : {} => Character cannot be length greater than two",
151                            RegexGen.boundary_character.__name__, start)
152        elif len(character) == 2 and character not in {"\w", "\W", "\d", "\."}:
153            raise Exception("In function {}, start : {} => Character is not a \w or \W or \d or \.",
154                            RegexGen.boundary_character.__name__, start)
155
156        character_str = "\b" + character if start else character + "\b"
157        
158        return character_str
159
160    
161    def add_quantifier(self, min: int, max: int, **kwargs) -> str:
162        """
163            <code>__add_quantifier</code>adds quantifiers like ? + * x(n,m).<br>
164        The function definition is:  <code>__add_quantifier(self, min: int, max: int, **kwargs)</code>.<br>
165        The regex generated depends on the value of min and max.
166            <ol>
167                <li>min == max and max == 0:<br>
168                    If no characters exist then the exception is raised stating min and max can't be zero.       
169                    <pre> regex = " "</pre>         </li>      
170                 <li>max == min and min == 1:
171                    <pre> regex = "^foo.{1,5}bar"</pre></li>
172                <li>max == min:
173                    <pre> regex = "^foo.{1,5}bar"</pre></li>                
174                <li>min == 0 and max == 1:
175                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
176                <>max == 0 and min > 0:
177                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
178                <li>max > min and min > 0:
179                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
180                <li>Else:  
181                    If no characters exist then the exception is raised stating min and max can't be zero.</li>                      
182                </ol>
183        <pre> regex = "^foo.{1,5}bar"</pre>
184        Add Quantifiers like {0},{0,1},?,*,+,{0,1}
185    """
186        regexchar: str = str()
187
188        if min == max and max == 0:
189            zeroormore = kwargs.get("zeroormore", False)
190            oneormore = kwargs.get("oneormore", False)
191            if zeroormore:
192                regexchar += '*'
193            elif oneormore:
194                regexchar += '+'
195            else:
196                raise Exception("In function {} => Min And Max Cannot be Zero"
197                                .format(self.__add_quantifier.__name__))
198        elif max == min and min == 1:
199            regexchar = ""
200        elif max == min:
201            regexchar = f"{{{min}}}"
202        elif min == 0 and max == 1:
203            regexchar = "?"
204        elif max == 0 and min > 0:
205            regexchar = f"{{{min},}}"
206        elif max > min and min > 0:
207            regexchar = f"{{{min},{max}}}"
208        else:
209            regexchar = f"{{,{max}}}"
210
211        return regexchar
212
213    def text(self, character: str, min: int = 0, max: int = 0, **kwargs) -> Self:
214        ''' 
215        <p><code>Text</code> function simply adds the input to regex syntax.</p>
216        <pre><code><p>RegexGen.text("This is a text.") </p></code></pre>           
217        
218        <p>The regex is displayed as:</p>
219        <pre> regex = "This is a text."</pre>
220        Text is generated using Characters function.
221        character : str // A character can be a word, alphabet, a digit or number and symbols or a range
222        min : int => default = 0  // if min and max are both zero it must pass a keyword argument as True 
223        max : int  => default = 0
224        capture : bool => default = False //On True enclose the character in parenthesis so that regex engine capture data
225        kwargs : dict => {
226            zeroormore : bool => default=False,
227            oneormore : bool => default=False
228        }
229        return : RegexGen
230    '''
231        letterstr: str = str()
232        temp: str = str()
233
234        if not character:
235            raise Exception("In function {}, Character : {} => Character cannot be None".format(
236                self.text.__name__, character))
237
238        letterstr = character
239
240        try:
241            temp = self.__add_quantifier(min, max, **kwargs)
242        except Exception as e:
243            raise
244
245        self.__regex_data += letterstr
246        self.__regex_data += temp
247
248        return self
249
250    def any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:
251        ''' 
252        <p><code>Any</code> function generates a regex which can be utilized to check if a certain character exists in the expression<br>
253            In the function definition, 
254            <code>any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:</code>
255            ,
256            <ol class="list-group list-group-numbered">
257                <li class="list-group-item d-flex justify-content-between align-items-start">
258                    <div class="ms-2 me-auto">
259                        the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br>
260                    </div></li>
261                <li class="list-group-item d-flex justify-content-between align-items-start">
262                    <div class="ms-2 me-auto">
263                        If on capture : bool (default=False) True is passed, it  enclose . in parenthesis so that regex engine capture data. <br>
264                    </div></li>
265                <li class="list-group-item d-flex justify-content-between align-items-start">
266                    <div class="ms-2 me-auto">
267                        The kwargs : dict accepts {<br>
268                            zeroormore : bool  (default=False), <br>
269                            oneormore : bool  (default=False)            
270                    </div></li>
271                    <li class="list-group-item d-flex justify-content-between align-items-start">
272                        <div class="ms-2 me-auto">
273                            The return returns RegexGen
274                        </div></li></ol><br></p>
275        <pre><code>regex = RegexGen()
276            regex = regex.any(min=0, max=12)</code></pre>           
277        
278        <p>The regex is displayed as:</p>
279        <pre>  regex = ".{0,12}"</pre> 
280        . character symbolizes any character
281        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
282        max : int => default = 0
283        capture : bool => default=False //On True enclose . in parenthesis so that regex engine capture data
284        kwargs : dict => {
285            zeroormore : bool => default=False,
286            oneormore : bool => default=False
287        }
288        return : RegexGen
289    '''
290        anystr: str = str()
291        temp: str = str()
292
293        try:
294            temp = self.__add_quantifier(min, max, **kwargs)
295        except (...):
296            raise
297
298        anystr = f"(.{temp})" if capture else f".{temp}"
299        self.__regex_data += anystr
300
301        return self
302
303
304    def digits(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self:
305        '''
306        This function is used to match only numbers that may not contain a sequence of number or the each numbers existing independently.
307        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
308        max : int => default = 0
309        pattern : a tuple[str, bool] expected a return type from exclude static function
310        capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data
311        kwargs : dict => {
312            zeroormore : bool => default=False,
313            oneormore : bool => default=False
314        }
315        return : RegexGen
316    '''
317        digitstr: str = str()
318        temp: str = str()
319
320        try:
321            temp = self.__add_quantifier(min, max, **kwargs)
322        except (...):
323            raise
324
325        if pattern is None:
326            digitstr = f"(\d{temp})" if capture else f"\d{temp}"
327        elif pattern[1]:
328            digitstr = f"((?!{pattern[0]})\d){temp}" if capture else f"(?:(?!{pattern[0]})\d){temp}"
329        else:
330            digitstr = f"((?![{pattern[0]}])\d){temp}" if capture else f"(?:(?![{pattern[0]}])\d){temp}"
331
332        self.__regex_data += digitstr
333
334        return self
335
336    def alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self:
337        '''
338        <p><code>Alphabets</code> function matches only words(not numbers) that may not contain a sequence of letters or each of the letters exist independently.<br>
339                </p>
340        In the function definition, 
341        <code>alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs)</code>
342        , <br><br>
343        <ol class="list-group list-group-numbered">
344        <li class="list-group-item d-flex justify-content-between align-items-start">
345            <div class="ms-2 me-auto">
346                the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br>
347            </div></li>
348        <li class="list-group-item d-flex justify-content-between align-items-start">
349            <div class="ms-2 me-auto">
350                In pattern, a tuple[str, bool] is expected as a return type from exclude static function. <br>
351            </div></li>
352        <li class="list-group-item d-flex justify-content-between align-items-start">
353            <div class="ms-2 me-auto">
354                If on capture : bool (default=False), True is passed, it  encloses the regex syntax in parenthesis so that regex engine captures data.<br>
355            </div></li>
356        <li class="list-group-item d-flex justify-content-between align-items-start">
357            <div class="ms-2 me-auto">
358                The kwargs : dict accepts {<br>
359                    zeroormore : bool  (default=False), <br>
360                    oneormore : bool  (default=False)            
361            </div></li>
362            <li class="list-group-item d-flex justify-content-between align-items-start">
363                <div class="ms-2 me-auto">
364                    The return returns RegexGen
365                </div></li></ol><br>
366        <pre><code><p>regex = RegexGen().alphabets(1,5)</p></code></pre>           
367        <p>The regex is displayed as:</p>
368        <pre> regex = "[a-zA-Z]{1,5}"</pre>
369        This function is used to match only words(not numbers) that may not contain a sequence of letters or the each letters existing independently.
370        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
371        max : int => default = 0
372        pattern : a tuple[str, bool] expected a return type from exclude static function
373        capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data
374        kwargs : dict => {
375            zeroormore : bool => default=False,
376            oneormore : bool => default=False
377        }
378        return : RegexGen
379    '''
380        characterstr: str = str()
381        temp: str = str()
382
383        try:
384            temp = self.__add_quantifier(min, max, **kwargs)
385        except (...):
386            raise
387
388        if pattern is None:
389            characterstr = f"([a-zA-Z]{temp})" if capture else f"[a-zA-Z]{temp}"
390        elif pattern[1]:
391            characterstr = f"((?!{pattern[0]})a-zA-Z){temp}" if capture else f"(?:(?!{pattern[0]})a-zA-Z){temp}"
392        else:
393            characterstr = f"((?![{pattern[0]}])a-zA-Z){temp}\b" if capture else f"(?:(?![{pattern[0]}])a-zA-Z){temp}"
394
395        self.__regex_data += characterstr
396
397        return self
398
399    def get_non_capturing_regex(self) -> str:
400        '''
401        If the program have capture parameters it will prevent regex engine from capturing index and patterns from the string 
402        reducing capturing overhead and hence increase efficiency
403        return : str 
404    '''
405        return f"(?:{self.__regex_data})"
406
407    def get_regex_data(self) -> str:
408        '''
409        Returns a regex syntax that may capture the text from the input string. 
410        return : str
411    '''
412        return self.__regex_data
413
414    def combine(self, regex: Self) -> Self:
415        '''
416        <p><code>Combine</code> function creates a regex syntax to combine two regex expressions in one to create a pattern.<br>
417                </p>
418        In the function definition, 
419        <code>combine(self, regex: Self)</code>
420        , the function accepts value of two different regex to perform the combination operation. <br>
421        <pre><code><p>regexa = RegexGen().digits(4,4).text(RegexGen.characters('-'))
422            regexb = RegexGen().digits(3,3)
423            regex = RegexGen.combine(regexa, regexb) </p></code></pre>           
424        
425        <p>The regex is displayed as:</p>
426        <pre> regex = "\d{4,4}-\d{3,3}"</pre>
427        regex : RegexGen //Object that has regex syntax which is addable
428        return : RegexGen 
429    '''
430        if len(regex.__regex_data) == 0:  # and regex.__regex_data[0] == '^':
431            raise Exception("Invalid regex to combine")
432
433        self.__regex_data += regex.__regex_data
434
435        return self
436
437    @staticmethod
438    def any_of(characters: str, capture: bool = False, **kwargs) -> str:
439        '''
440        <p><code>Any of</code> function is any_of_the_block with quantifiers or simply put, this function defines repetition of words in the list.</p> <br>
441        In the function definition, <code>any_of(characters: str, capture: bool = False, **kwargs) -> str:</code>,
442        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
443        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
444        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
445        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
446        <li>The return returns RegexGen.</li></ol>
447
448        '''
449        if valid_ranges(characters, is_number, is_lower_case, is_upper_case) or characters.find(RegexGen.symbolsrange) != -1:
450            pass
451
452        for character in characters:
453            if not type("a").isascii(character):
454                raise Exception("In function {}, character : {} => Non ascii character is not acceptable".format(
455                    RegexGen.any_of.__name__, character))
456        return f"([{characters}])" if capture else f"(?:[{characters}])"
457
458    @staticmethod
459    def characters(char: str) -> str:
460        '''
461        <p><code>Characters</code> function is a static function which is unable to create a regex syntax.<br>
462        Instead, a function like Text is used to submit to the regex syntax. <br>
463        Characters is used some characters predefined in the regex library are used and thus they need to be escaped.
464        <pre><code><p>RegexGen.text(RegexGen.characters("This+is{a$text.") </p></code></pre>           
465                        
466                        <p>The regex is displayed as:</p>
467                        <samp> regex = "This\+is\{a\$text\."</samp><br>
468        some characters are predefined in the regex library thus they need to be escaped
469        return : str 
470        </p>
471    '''
472        letters: str = str()
473        if not char:
474            raise Exception("In function {}, character : {} => Input cannot be none ".format(
475                RegexGen.character.__name__, char))
476
477        predefined_symbols: set = {
478            '\\', '.', '(', ')', '*', '{', '}', '^', '+', '?', '[', ']', '$', '|'}
479
480        for lettr in char:
481            if lettr in predefined_symbols:
482                letters += f"\\{lettr}"
483            else:
484                letters += lettr
485        return letters
486
487    def succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:
488        '''
489        This function is used to match the pattern succeeded by another pattern.<br>
490        In the function definition, <code>succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>,
491        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
492        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
493        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
494        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
495        <li>The return returns RegexGen.</li></ol>
496    '''
497        if not preceeding or len(preceeding) != 2:
498            raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format(
499                RegexGen.succeeded_by.__name__))
500        if not succeeding or len(succeeding) != 2:
501            raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format(
502                RegexGen.succeeded_by.__name__))
503
504        characterstr: str = str()
505        temp: str = str()
506
507        try:
508            temp = self.__add_quantifier(min, max, **kwargs)
509        except (...):
510            raise
511
512        followblock: str = str()
513        if invert:
514            followblock = f"(?!{succeeding[0]})" if succeeding[1] else f"(?![{succeeding[0]}])"
515        else:
516            followblock = f"(?={succeeding[0]})" if succeeding[1] else f"(?=[{succeeding[0]}])"
517
518        precedingblock: str = f"{preceeding[0]}{temp}" if preceeding[1] else f"[{preceeding[0]}]{temp}"
519
520        if len(self.__regex_data) > len(precedingblock) and \
521                self.__regex_data.rindex(precedingblock) == len(self.__regex_data)-len(precedingblock)-1:
522            characterstr += followblock
523            self.__regex_data = self.__regex_data[:-1]
524            characterstr += ')'
525        else:
526            characterstr = precedingblock + followblock
527            characterstr = f"({characterstr})" if capture else f"(?:{characterstr})"
528        self.__regex_data += characterstr
529        return self
530
531    def preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:
532        '''
533        This function is used to match pattern that is preceded by another pattern.<br>
534        If the pattern of the succeeded_by and preceeded_by matches the combination is union. <br>
535      In the function definition, <code>preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>,
536        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
537        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
538        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
539        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
540        <li>The return returns RegexGen.</li></ol>
541    '''
542        if not preceding or len(preceding) != 2:
543            raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format(
544                RegexGen.preceded_by.__name__))
545        if not succeeding or len(succeeding) != 2:
546            raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format(
547                RegexGen.preceded_by.__name__))
548
549        characterstr: str = str()
550        temp: str = str()
551
552        try:
553            temp = self.__add_quantifier(min, max, **kwargs)
554        except (...):
555            raise
556
557        preceedingblock: str = str()
558        if invert:
559            preceedingblock = f"(?<!{preceding[0]})" if preceding[1] else f"(?<![{preceding[0]}])"
560        else:
561            preceedingblock = f"(?<={preceding[0]})" if preceding[1] else f"(?<=[{preceding[0]}])"
562
563        followblock: str = f"{succeeding[0]}{temp}" if succeeding[1] else f"[{succeeding[0]}]{temp}"
564        characterstr = preceedingblock + followblock
565        characterstr = f"({characterstr})" if capture else f"(?:{characterstr})"
566        self.__regex_data += characterstr
567        return self
568
569    # @staticmethod
570    # def any_of(characters: tuple[dict], capture: bool = False, **kwargs) -> str:
571    #     '''
572    #     This function is any_of_the_block with quantifiers or this function defines repetition of words in the list. <br>
573    #     In the function definition, <code>any_of(characters: tuple[dict], capture: bool = False, **kwargs) -> str:</code>,
574    #     <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
575    #     <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
576    #     <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
577    #     <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
578    #     <li>The return returns RegexGen.</li></ol>
579    #     '''
580    #     character_str = str()
581    #     tempstr = str()
582
583    #     if not len(characters):
584    #         return ""
585
586    #     character_pair = list()
587    #     for index, listitem in enumerate(characters):
588    #         character = listitem.pop("character", None)
589    #         min = listitem.pop("min", 0)
590    #         max = listitem.pop("max", 0)
591    #         if character is None:
592    #             raise Exception("In function {}, at index {} doesn't have character pair.".format(
593    #                 RegexGen.any_of.__name__, index))
594    #         if len(character) == 0:
595    #             continue
596    #         elif len(character) == 1 or (len(character) == 2 and character in {"\s", "\d", "\w", "\W"}):
597    #             pass
598    #         elif len(character) == 3 and valid_ranges(character, is_lower_case, is_number, is_upper_case):
599    #             pass
600    #         else:
601    #             raise Exception("In function {}, at index {}, Unknown Character: {}.".format(
602    #                 RegexGen.any_of.__name__, index, character))
603    #         tempstr = RegexGen.__add_quantifier(min=min, max=max, **listitem)
604    #         character_pair.append(character+tempstr)
605
606    #     character_str = "|".join(character_pair)
607    #     return f"({character_str})" if capture else f"(?:{character_str})"

class RegexGen: View Source

 10class RegexGen:
 11    """
 12    Start of the class
 13    """
 14    # ranges
 15    lowercaserange: str = "[a-z]"
 16    uppercaserange: str = "[A-Z]"
 17    digitsrange: str = "[0-9]"
 18    symbolsrange: str = "\W"
 19    alphanumeric: str = "\w"
 20    # ecape sequences
 21    block_word: str = "\b"
 22    nonblock_word: str = "\B"
 23    new_line: str = "\n"
 24    tab_space: str = "\t"
 25    carriage_return: str = "\r"
 26    whitespace = "\s"
 27
 28    def __init__(self):
 29        self.__regex_data: str = str()
 30
 31
 32
 33    def linestartwith(self):
 34        '''
 35        <code>linestartwith</code> adds a expression to indicate beginning of the string and is always added on a new line.<br></p>
 36        The function definition is:  
 37            `linestartwith(self)`
 38        <p>When this function is called, the function adds the expression '^' if no regex data exists already.</p>
 39        <p>If the regex data already exists then the function adds the expression '\\n^' indicating line is changed before signifying beginning of the string.  </p>
 40        <p><code>Linestartwith</code> function can be used in combination with other function to create a regex syntax to check if the expression starts with certain pattern.<br>
 41        </p>
 42        ```
 43        regex = RegexGen().linestartwith().text('foo').any(1,5).text('bar').endofline()
 44        ```       
 45        <p>This code checks for text starting with 'foo' and starting with 'bar' after any characters of length of min 1 and max 5 digits.</p>
 46        <p>The regex is displayed as:</p>
 47        <samp> regex = "^foo.{1,5}bar"</samp></p>
 48    ^ character symbolizes the start of the string or line.
 49    '''
 50        if not len(self.__regex_data):
 51            self.__regex_data += '^'
 52        else:
 53            self.__regex_data += '\n^'
 54        return self
 55
 56
 57
 58    def endofline(self):
 59        '''
 60        <p><code>endofline</code> adds a expression to indicates end of the string and end of the line.<br>
 61            The function definition is: 
 62          <code>
 63           endofline(self) 
 64           </code>
 65        <p>When this function is called, the function adds the expression '$' to the regex data.
 66        <p>If the regex data already exists then the function adds the expression '\n^' indicating line is changed before signifying beginning of the string.  
 67        <p><code>endofline</code> function can be used in combination with other function to create a regex syntax to check if the expression ends with certain pattern.<br>
 68        <pre><code><p>regex = RegexGen().regex.text('abc', 1, 1).endofline()</p></code></pre>           
 69        <p>This code checks for text ending with 'abc' after any characters of any length 
 70        <p>The regex is displayed as:<br>
 71        <samp> regex = "abc$"</samp><br>
 72        $ character symbolizes as end of a line
 73        </p></p></p></p></p>
 74        '''
 75        self.__regex_data += '$'
 76        return self
 77    
 78    @staticmethod
 79    def range(start: str, end: str) -> str:
 80        '''
 81        <code>Range</code> function provides syntax for defining range.<br><br>
 82            In the function definition, 
 83            <code>range(start: str, end: str)</code> <br>
 84                    1. start accepts string for starting range whose length must be 1.                           
 85                    2. end accepts string for ending range whose length must be 1.                             
 86                    3. return returns the range in format <start>-<end>
 87        
 88        <pre><code><p>regex = RegexGen().regex.range('a', 'z')</p></code></pre>           
 89        <p>The regex is displayed as:</p>
 90        <samp> regex = "[a-z]"</samp><br>
 91        Range for symbols will throw an error
 92        start : str // length must be 1
 93        end : str  //length must be 1
 94        return : str //returns the range in format <start>-<end>
 95    '''
 96        if (not start and not end) and (len(start) > 1 and len(end) > 1):
 97            raise Exception("In function {}, range_start : {}, range_end:{} => Characters cannot be None".format(
 98                RegexGen.range.__name__, start, end))
 99
100        # check if range is valid
101        character_range = f"({start}-{end})"
102        if (valid_ranges(character_range, is_lower_case, is_upper_case, is_number)):
103            return character_range
104        raise Exception("In function {}, range_start : {}, range_end:{} => This is not a valid range. Valid ranges are 0-9,A-Z or a-z or \W".format(
105            RegexGen.range.__name__, start, end))
106
107
108    @staticmethod
109    def exclude(characters: str, pattern_prevent: bool = False) -> Tuple[str, bool]:
110        '''
111        <code>Exclude</code> function is a static function. It excludes certain pattern based upon the input of the user.<br>
112        In the function definition, <code>exclude(characters: str, pattern_prevent: bool = False) -> tuple</code>,
113            <ol><li> the characters : str signifies characters to be matched,</li> 
114            <li>pattern_prevent : str (default = False) Here, on True, prevents the characters sequence to match(The sequence must not contain a range) 
115                    and on false prevent piecewise occuring of characters.
116                    and returns a tuple   </li>                 
117                
118            
119    <pre><code><p>RegexGen().digits(1, 10, RegexGen.exclude("23", True), capture=True) </p></code></pre>           
120    
121    <p>The regex is displayed as:<br>
122    <samp> regex = "\b(?:(?![23])\d)+\b"</samp><br>
123        characters : str  //characters to be matched
124        pattern_prevent : bool  => default = False //On True, prevents the characters sequence to match(The sequence must not contain a range) 
125                                                //and on false prevent piecewise occuring of characters.
126        return : tuple
127    '''
128        if not characters:
129            raise Exception("In function {}, Character : {} => Characters cannot be None".format(
130                RegexGen.exclude.__name__, characters))
131        # check if charaters is a range
132        try:
133            if valid_ranges(characters, is_lower_case, is_upper_case, is_number) or characters.find(RegexGen.symbolsrange) != -1:
134                pattern_prevent = False
135        except (...):
136            raise
137
138        return characters, pattern_prevent
139
140    @staticmethod
141    def boundary_character(character: str, start: bool = True) -> str:
142        '''
143        <code>Boundary character</code> gives left or right boundary of the word as required.<br>
144        In the function definition, <code> boundary_character(character: str, start: bool = True) -> str: </code><br>
145        <ol>
146        <li> character: str signifies characters to be matched</li>
147        <li>start : bool (default = True) On true, the letter is the left boundary of the word<br> 
148                                        and on false the letter is the right boundary of the word.
149    '''
150        if len(character) > 2:
151            raise Exception("In function {}, start : {} => Character cannot be length greater than two",
152                            RegexGen.boundary_character.__name__, start)
153        elif len(character) == 2 and character not in {"\w", "\W", "\d", "\."}:
154            raise Exception("In function {}, start : {} => Character is not a \w or \W or \d or \.",
155                            RegexGen.boundary_character.__name__, start)
156
157        character_str = "\b" + character if start else character + "\b"
158        
159        return character_str
160
161    
162    def add_quantifier(self, min: int, max: int, **kwargs) -> str:
163        """
164            <code>__add_quantifier</code>adds quantifiers like ? + * x(n,m).<br>
165        The function definition is:  <code>__add_quantifier(self, min: int, max: int, **kwargs)</code>.<br>
166        The regex generated depends on the value of min and max.
167            <ol>
168                <li>min == max and max == 0:<br>
169                    If no characters exist then the exception is raised stating min and max can't be zero.       
170                    <pre> regex = " "</pre>         </li>      
171                 <li>max == min and min == 1:
172                    <pre> regex = "^foo.{1,5}bar"</pre></li>
173                <li>max == min:
174                    <pre> regex = "^foo.{1,5}bar"</pre></li>                
175                <li>min == 0 and max == 1:
176                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
177                <>max == 0 and min > 0:
178                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
179                <li>max > min and min > 0:
180                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
181                <li>Else:  
182                    If no characters exist then the exception is raised stating min and max can't be zero.</li>                      
183                </ol>
184        <pre> regex = "^foo.{1,5}bar"</pre>
185        Add Quantifiers like {0},{0,1},?,*,+,{0,1}
186    """
187        regexchar: str = str()
188
189        if min == max and max == 0:
190            zeroormore = kwargs.get("zeroormore", False)
191            oneormore = kwargs.get("oneormore", False)
192            if zeroormore:
193                regexchar += '*'
194            elif oneormore:
195                regexchar += '+'
196            else:
197                raise Exception("In function {} => Min And Max Cannot be Zero"
198                                .format(self.__add_quantifier.__name__))
199        elif max == min and min == 1:
200            regexchar = ""
201        elif max == min:
202            regexchar = f"{{{min}}}"
203        elif min == 0 and max == 1:
204            regexchar = "?"
205        elif max == 0 and min > 0:
206            regexchar = f"{{{min},}}"
207        elif max > min and min > 0:
208            regexchar = f"{{{min},{max}}}"
209        else:
210            regexchar = f"{{,{max}}}"
211
212        return regexchar
213
214    def text(self, character: str, min: int = 0, max: int = 0, **kwargs) -> Self:
215        ''' 
216        <p><code>Text</code> function simply adds the input to regex syntax.</p>
217        <pre><code><p>RegexGen.text("This is a text.") </p></code></pre>           
218        
219        <p>The regex is displayed as:</p>
220        <pre> regex = "This is a text."</pre>
221        Text is generated using Characters function.
222        character : str // A character can be a word, alphabet, a digit or number and symbols or a range
223        min : int => default = 0  // if min and max are both zero it must pass a keyword argument as True 
224        max : int  => default = 0
225        capture : bool => default = False //On True enclose the character in parenthesis so that regex engine capture data
226        kwargs : dict => {
227            zeroormore : bool => default=False,
228            oneormore : bool => default=False
229        }
230        return : RegexGen
231    '''
232        letterstr: str = str()
233        temp: str = str()
234
235        if not character:
236            raise Exception("In function {}, Character : {} => Character cannot be None".format(
237                self.text.__name__, character))
238
239        letterstr = character
240
241        try:
242            temp = self.__add_quantifier(min, max, **kwargs)
243        except Exception as e:
244            raise
245
246        self.__regex_data += letterstr
247        self.__regex_data += temp
248
249        return self
250
251    def any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:
252        ''' 
253        <p><code>Any</code> function generates a regex which can be utilized to check if a certain character exists in the expression<br>
254            In the function definition, 
255            <code>any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:</code>
256            ,
257            <ol class="list-group list-group-numbered">
258                <li class="list-group-item d-flex justify-content-between align-items-start">
259                    <div class="ms-2 me-auto">
260                        the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br>
261                    </div></li>
262                <li class="list-group-item d-flex justify-content-between align-items-start">
263                    <div class="ms-2 me-auto">
264                        If on capture : bool (default=False) True is passed, it  enclose . in parenthesis so that regex engine capture data. <br>
265                    </div></li>
266                <li class="list-group-item d-flex justify-content-between align-items-start">
267                    <div class="ms-2 me-auto">
268                        The kwargs : dict accepts {<br>
269                            zeroormore : bool  (default=False), <br>
270                            oneormore : bool  (default=False)            
271                    </div></li>
272                    <li class="list-group-item d-flex justify-content-between align-items-start">
273                        <div class="ms-2 me-auto">
274                            The return returns RegexGen
275                        </div></li></ol><br></p>
276        <pre><code>regex = RegexGen()
277            regex = regex.any(min=0, max=12)</code></pre>           
278        
279        <p>The regex is displayed as:</p>
280        <pre>  regex = ".{0,12}"</pre> 
281        . character symbolizes any character
282        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
283        max : int => default = 0
284        capture : bool => default=False //On True enclose . in parenthesis so that regex engine capture data
285        kwargs : dict => {
286            zeroormore : bool => default=False,
287            oneormore : bool => default=False
288        }
289        return : RegexGen
290    '''
291        anystr: str = str()
292        temp: str = str()
293
294        try:
295            temp = self.__add_quantifier(min, max, **kwargs)
296        except (...):
297            raise
298
299        anystr = f"(.{temp})" if capture else f".{temp}"
300        self.__regex_data += anystr
301
302        return self
303
304
305    def digits(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self:
306        '''
307        This function is used to match only numbers that may not contain a sequence of number or the each numbers existing independently.
308        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
309        max : int => default = 0
310        pattern : a tuple[str, bool] expected a return type from exclude static function
311        capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data
312        kwargs : dict => {
313            zeroormore : bool => default=False,
314            oneormore : bool => default=False
315        }
316        return : RegexGen
317    '''
318        digitstr: str = str()
319        temp: str = str()
320
321        try:
322            temp = self.__add_quantifier(min, max, **kwargs)
323        except (...):
324            raise
325
326        if pattern is None:
327            digitstr = f"(\d{temp})" if capture else f"\d{temp}"
328        elif pattern[1]:
329            digitstr = f"((?!{pattern[0]})\d){temp}" if capture else f"(?:(?!{pattern[0]})\d){temp}"
330        else:
331            digitstr = f"((?![{pattern[0]}])\d){temp}" if capture else f"(?:(?![{pattern[0]}])\d){temp}"
332
333        self.__regex_data += digitstr
334
335        return self
336
337    def alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self:
338        '''
339        <p><code>Alphabets</code> function matches only words(not numbers) that may not contain a sequence of letters or each of the letters exist independently.<br>
340                </p>
341        In the function definition, 
342        <code>alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs)</code>
343        , <br><br>
344        <ol class="list-group list-group-numbered">
345        <li class="list-group-item d-flex justify-content-between align-items-start">
346            <div class="ms-2 me-auto">
347                the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br>
348            </div></li>
349        <li class="list-group-item d-flex justify-content-between align-items-start">
350            <div class="ms-2 me-auto">
351                In pattern, a tuple[str, bool] is expected as a return type from exclude static function. <br>
352            </div></li>
353        <li class="list-group-item d-flex justify-content-between align-items-start">
354            <div class="ms-2 me-auto">
355                If on capture : bool (default=False), True is passed, it  encloses the regex syntax in parenthesis so that regex engine captures data.<br>
356            </div></li>
357        <li class="list-group-item d-flex justify-content-between align-items-start">
358            <div class="ms-2 me-auto">
359                The kwargs : dict accepts {<br>
360                    zeroormore : bool  (default=False), <br>
361                    oneormore : bool  (default=False)            
362            </div></li>
363            <li class="list-group-item d-flex justify-content-between align-items-start">
364                <div class="ms-2 me-auto">
365                    The return returns RegexGen
366                </div></li></ol><br>
367        <pre><code><p>regex = RegexGen().alphabets(1,5)</p></code></pre>           
368        <p>The regex is displayed as:</p>
369        <pre> regex = "[a-zA-Z]{1,5}"</pre>
370        This function is used to match only words(not numbers) that may not contain a sequence of letters or the each letters existing independently.
371        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
372        max : int => default = 0
373        pattern : a tuple[str, bool] expected a return type from exclude static function
374        capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data
375        kwargs : dict => {
376            zeroormore : bool => default=False,
377            oneormore : bool => default=False
378        }
379        return : RegexGen
380    '''
381        characterstr: str = str()
382        temp: str = str()
383
384        try:
385            temp = self.__add_quantifier(min, max, **kwargs)
386        except (...):
387            raise
388
389        if pattern is None:
390            characterstr = f"([a-zA-Z]{temp})" if capture else f"[a-zA-Z]{temp}"
391        elif pattern[1]:
392            characterstr = f"((?!{pattern[0]})a-zA-Z){temp}" if capture else f"(?:(?!{pattern[0]})a-zA-Z){temp}"
393        else:
394            characterstr = f"((?![{pattern[0]}])a-zA-Z){temp}\b" if capture else f"(?:(?![{pattern[0]}])a-zA-Z){temp}"
395
396        self.__regex_data += characterstr
397
398        return self
399
400    def get_non_capturing_regex(self) -> str:
401        '''
402        If the program have capture parameters it will prevent regex engine from capturing index and patterns from the string 
403        reducing capturing overhead and hence increase efficiency
404        return : str 
405    '''
406        return f"(?:{self.__regex_data})"
407
408    def get_regex_data(self) -> str:
409        '''
410        Returns a regex syntax that may capture the text from the input string. 
411        return : str
412    '''
413        return self.__regex_data
414
415    def combine(self, regex: Self) -> Self:
416        '''
417        <p><code>Combine</code> function creates a regex syntax to combine two regex expressions in one to create a pattern.<br>
418                </p>
419        In the function definition, 
420        <code>combine(self, regex: Self)</code>
421        , the function accepts value of two different regex to perform the combination operation. <br>
422        <pre><code><p>regexa = RegexGen().digits(4,4).text(RegexGen.characters('-'))
423            regexb = RegexGen().digits(3,3)
424            regex = RegexGen.combine(regexa, regexb) </p></code></pre>           
425        
426        <p>The regex is displayed as:</p>
427        <pre> regex = "\d{4,4}-\d{3,3}"</pre>
428        regex : RegexGen //Object that has regex syntax which is addable
429        return : RegexGen 
430    '''
431        if len(regex.__regex_data) == 0:  # and regex.__regex_data[0] == '^':
432            raise Exception("Invalid regex to combine")
433
434        self.__regex_data += regex.__regex_data
435
436        return self
437
438    @staticmethod
439    def any_of(characters: str, capture: bool = False, **kwargs) -> str:
440        '''
441        <p><code>Any of</code> function is any_of_the_block with quantifiers or simply put, this function defines repetition of words in the list.</p> <br>
442        In the function definition, <code>any_of(characters: str, capture: bool = False, **kwargs) -> str:</code>,
443        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
444        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
445        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
446        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
447        <li>The return returns RegexGen.</li></ol>
448
449        '''
450        if valid_ranges(characters, is_number, is_lower_case, is_upper_case) or characters.find(RegexGen.symbolsrange) != -1:
451            pass
452
453        for character in characters:
454            if not type("a").isascii(character):
455                raise Exception("In function {}, character : {} => Non ascii character is not acceptable".format(
456                    RegexGen.any_of.__name__, character))
457        return f"([{characters}])" if capture else f"(?:[{characters}])"
458
459    @staticmethod
460    def characters(char: str) -> str:
461        '''
462        <p><code>Characters</code> function is a static function which is unable to create a regex syntax.<br>
463        Instead, a function like Text is used to submit to the regex syntax. <br>
464        Characters is used some characters predefined in the regex library are used and thus they need to be escaped.
465        <pre><code><p>RegexGen.text(RegexGen.characters("This+is{a$text.") </p></code></pre>           
466                        
467                        <p>The regex is displayed as:</p>
468                        <samp> regex = "This\+is\{a\$text\."</samp><br>
469        some characters are predefined in the regex library thus they need to be escaped
470        return : str 
471        </p>
472    '''
473        letters: str = str()
474        if not char:
475            raise Exception("In function {}, character : {} => Input cannot be none ".format(
476                RegexGen.character.__name__, char))
477
478        predefined_symbols: set = {
479            '\\', '.', '(', ')', '*', '{', '}', '^', '+', '?', '[', ']', '$', '|'}
480
481        for lettr in char:
482            if lettr in predefined_symbols:
483                letters += f"\\{lettr}"
484            else:
485                letters += lettr
486        return letters
487
488    def succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:
489        '''
490        This function is used to match the pattern succeeded by another pattern.<br>
491        In the function definition, <code>succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>,
492        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
493        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
494        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
495        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
496        <li>The return returns RegexGen.</li></ol>
497    '''
498        if not preceeding or len(preceeding) != 2:
499            raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format(
500                RegexGen.succeeded_by.__name__))
501        if not succeeding or len(succeeding) != 2:
502            raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format(
503                RegexGen.succeeded_by.__name__))
504
505        characterstr: str = str()
506        temp: str = str()
507
508        try:
509            temp = self.__add_quantifier(min, max, **kwargs)
510        except (...):
511            raise
512
513        followblock: str = str()
514        if invert:
515            followblock = f"(?!{succeeding[0]})" if succeeding[1] else f"(?![{succeeding[0]}])"
516        else:
517            followblock = f"(?={succeeding[0]})" if succeeding[1] else f"(?=[{succeeding[0]}])"
518
519        precedingblock: str = f"{preceeding[0]}{temp}" if preceeding[1] else f"[{preceeding[0]}]{temp}"
520
521        if len(self.__regex_data) > len(precedingblock) and \
522                self.__regex_data.rindex(precedingblock) == len(self.__regex_data)-len(precedingblock)-1:
523            characterstr += followblock
524            self.__regex_data = self.__regex_data[:-1]
525            characterstr += ')'
526        else:
527            characterstr = precedingblock + followblock
528            characterstr = f"({characterstr})" if capture else f"(?:{characterstr})"
529        self.__regex_data += characterstr
530        return self
531
532    def preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:
533        '''
534        This function is used to match pattern that is preceded by another pattern.<br>
535        If the pattern of the succeeded_by and preceeded_by matches the combination is union. <br>
536      In the function definition, <code>preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>,
537        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
538        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
539        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
540        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
541        <li>The return returns RegexGen.</li></ol>
542    '''
543        if not preceding or len(preceding) != 2:
544            raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format(
545                RegexGen.preceded_by.__name__))
546        if not succeeding or len(succeeding) != 2:
547            raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format(
548                RegexGen.preceded_by.__name__))
549
550        characterstr: str = str()
551        temp: str = str()
552
553        try:
554            temp = self.__add_quantifier(min, max, **kwargs)
555        except (...):
556            raise
557
558        preceedingblock: str = str()
559        if invert:
560            preceedingblock = f"(?<!{preceding[0]})" if preceding[1] else f"(?<![{preceding[0]}])"
561        else:
562            preceedingblock = f"(?<={preceding[0]})" if preceding[1] else f"(?<=[{preceding[0]}])"
563
564        followblock: str = f"{succeeding[0]}{temp}" if succeeding[1] else f"[{succeeding[0]}]{temp}"
565        characterstr = preceedingblock + followblock
566        characterstr = f"({characterstr})" if capture else f"(?:{characterstr})"
567        self.__regex_data += characterstr
568        return self
569
570    # @staticmethod
571    # def any_of(characters: tuple[dict], capture: bool = False, **kwargs) -> str:
572    #     '''
573    #     This function is any_of_the_block with quantifiers or this function defines repetition of words in the list. <br>
574    #     In the function definition, <code>any_of(characters: tuple[dict], capture: bool = False, **kwargs) -> str:</code>,
575    #     <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
576    #     <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
577    #     <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
578    #     <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
579    #     <li>The return returns RegexGen.</li></ol>
580    #     '''
581    #     character_str = str()
582    #     tempstr = str()
583
584    #     if not len(characters):
585    #         return ""
586
587    #     character_pair = list()
588    #     for index, listitem in enumerate(characters):
589    #         character = listitem.pop("character", None)
590    #         min = listitem.pop("min", 0)
591    #         max = listitem.pop("max", 0)
592    #         if character is None:
593    #             raise Exception("In function {}, at index {} doesn't have character pair.".format(
594    #                 RegexGen.any_of.__name__, index))
595    #         if len(character) == 0:
596    #             continue
597    #         elif len(character) == 1 or (len(character) == 2 and character in {"\s", "\d", "\w", "\W"}):
598    #             pass
599    #         elif len(character) == 3 and valid_ranges(character, is_lower_case, is_number, is_upper_case):
600    #             pass
601    #         else:
602    #             raise Exception("In function {}, at index {}, Unknown Character: {}.".format(
603    #                 RegexGen.any_of.__name__, index, character))
604    #         tempstr = RegexGen.__add_quantifier(min=min, max=max, **listitem)
605    #         character_pair.append(character+tempstr)
606
607    #     character_str = "|".join(character_pair)
608    #     return f"({character_str})" if capture else f"(?:{character_str})"

Start of the class

RegexGen() View Source

28    def __init__(self):
29        self.__regex_data: str = str()

def linestartwith(self): View Source

33    def linestartwith(self):
34        '''
35        <code>linestartwith</code> adds a expression to indicate beginning of the string and is always added on a new line.<br></p>
36        The function definition is:  
37            `linestartwith(self)`
38        <p>When this function is called, the function adds the expression '^' if no regex data exists already.</p>
39        <p>If the regex data already exists then the function adds the expression '\\n^' indicating line is changed before signifying beginning of the string.  </p>
40        <p><code>Linestartwith</code> function can be used in combination with other function to create a regex syntax to check if the expression starts with certain pattern.<br>
41        </p>
42        ```
43        regex = RegexGen().linestartwith().text('foo').any(1,5).text('bar').endofline()
44        ```       
45        <p>This code checks for text starting with 'foo' and starting with 'bar' after any characters of length of min 1 and max 5 digits.</p>
46        <p>The regex is displayed as:</p>
47        <samp> regex = "^foo.{1,5}bar"</samp></p>
48    ^ character symbolizes the start of the string or line.
49    '''
50        if not len(self.__regex_data):
51            self.__regex_data += '^'
52        else:
53            self.__regex_data += '\n^'
54        return self

linestartwith adds a expression to indicate beginning of the string and is always added on a new line.

The function definition is:
linestartwith(self)

When this function is called, the function adds the expression '^' if no regex data exists already.

If the regex data already exists then the function adds the expression '\n^' indicating line is changed before signifying beginning of the string.

Linestartwith function can be used in combination with other function to create a regex syntax to check if the expression starts with certain pattern.

regex = RegexGen().linestartwith().text('foo').any(1,5).text('bar').endofline()

This code checks for text starting with 'foo' and starting with 'bar' after any characters of length of min 1 and max 5 digits.

The regex is displayed as:

regex = "^foo.{1,5}bar"

^ character symbolizes the start of the string or line.

def endofline(self): View Source

58    def endofline(self):
59        '''
60        <p><code>endofline</code> adds a expression to indicates end of the string and end of the line.<br>
61            The function definition is: 
62          <code>
63           endofline(self) 
64           </code>
65        <p>When this function is called, the function adds the expression '$' to the regex data.
66        <p>If the regex data already exists then the function adds the expression '\n^' indicating line is changed before signifying beginning of the string.  
67        <p><code>endofline</code> function can be used in combination with other function to create a regex syntax to check if the expression ends with certain pattern.<br>
68        <pre><code><p>regex = RegexGen().regex.text('abc', 1, 1).endofline()</p></code></pre>           
69        <p>This code checks for text ending with 'abc' after any characters of any length 
70        <p>The regex is displayed as:<br>
71        <samp> regex = "abc$"</samp><br>
72        $ character symbolizes as end of a line
73        </p></p></p></p></p>
74        '''
75        self.__regex_data += '$'
76        return self

endofline adds a expression to indicates end of the string and end of the line.
The function definition is: endofline(self)

When this function is called, the function adds the expression '$' to the regex data.

If the regex data already exists then the function adds the expression ' ^' indicating line is changed before signifying beginning of the string.

endofline function can be used in combination with other function to create a regex syntax to check if the expression ends with certain pattern.

regex = RegexGen().regex.text('abc', 1, 1).endofline()

This code checks for text ending with 'abc' after any characters of any length

The regex is displayed as:
regex = "abc$"
$ character symbolizes as end of a line

@staticmethod

def range(start: str, end: str) -> str: View Source

 78    @staticmethod
 79    def range(start: str, end: str) -> str:
 80        '''
 81        <code>Range</code> function provides syntax for defining range.<br><br>
 82            In the function definition, 
 83            <code>range(start: str, end: str)</code> <br>
 84                    1. start accepts string for starting range whose length must be 1.                           
 85                    2. end accepts string for ending range whose length must be 1.                             
 86                    3. return returns the range in format <start>-<end>
 87        
 88        <pre><code><p>regex = RegexGen().regex.range('a', 'z')</p></code></pre>           
 89        <p>The regex is displayed as:</p>
 90        <samp> regex = "[a-z]"</samp><br>
 91        Range for symbols will throw an error
 92        start : str // length must be 1
 93        end : str  //length must be 1
 94        return : str //returns the range in format <start>-<end>
 95    '''
 96        if (not start and not end) and (len(start) > 1 and len(end) > 1):
 97            raise Exception("In function {}, range_start : {}, range_end:{} => Characters cannot be None".format(
 98                RegexGen.range.__name__, start, end))
 99
100        # check if range is valid
101        character_range = f"({start}-{end})"
102        if (valid_ranges(character_range, is_lower_case, is_upper_case, is_number)):
103            return character_range
104        raise Exception("In function {}, range_start : {}, range_end:{} => This is not a valid range. Valid ranges are 0-9,A-Z or a-z or \W".format(
105            RegexGen.range.__name__, start, end))

Range function provides syntax for defining range.

In the function definition, range(start: str, end: str)
1. start accepts string for starting range whose length must be 1.
2. end accepts string for ending range whose length must be 1.
3. return returns the range in format -

regex = RegexGen().regex.range('a', 'z')

The regex is displayed as:

regex = "[a-z]"
Range for symbols will throw an error start : str // length must be 1 end : str //length must be 1 return : str //returns the range in format -

@staticmethod

def exclude(characters: str, pattern_prevent: bool = False) -> Tuple[str, bool]: View Source

108    @staticmethod
109    def exclude(characters: str, pattern_prevent: bool = False) -> Tuple[str, bool]:
110        '''
111        <code>Exclude</code> function is a static function. It excludes certain pattern based upon the input of the user.<br>
112        In the function definition, <code>exclude(characters: str, pattern_prevent: bool = False) -> tuple</code>,
113            <ol><li> the characters : str signifies characters to be matched,</li> 
114            <li>pattern_prevent : str (default = False) Here, on True, prevents the characters sequence to match(The sequence must not contain a range) 
115                    and on false prevent piecewise occuring of characters.
116                    and returns a tuple   </li>                 
117                
118            
119    <pre><code><p>RegexGen().digits(1, 10, RegexGen.exclude("23", True), capture=True) </p></code></pre>           
120    
121    <p>The regex is displayed as:<br>
122    <samp> regex = "\b(?:(?![23])\d)+\b"</samp><br>
123        characters : str  //characters to be matched
124        pattern_prevent : bool  => default = False //On True, prevents the characters sequence to match(The sequence must not contain a range) 
125                                                //and on false prevent piecewise occuring of characters.
126        return : tuple
127    '''
128        if not characters:
129            raise Exception("In function {}, Character : {} => Characters cannot be None".format(
130                RegexGen.exclude.__name__, characters))
131        # check if charaters is a range
132        try:
133            if valid_ranges(characters, is_lower_case, is_upper_case, is_number) or characters.find(RegexGen.symbolsrange) != -1:
134                pattern_prevent = False
135        except (...):
136            raise
137
138        return characters, pattern_prevent

Exclude function is a static function. It excludes certain pattern based upon the input of the user.
In the function definition, exclude(characters: str, pattern_prevent: bool = False) -> tuple,

the characters : str signifies characters to be matched,
pattern_prevent : str (default = False) Here, on True, prevents the characters sequence to match(The sequence must not contain a range) and on false prevent piecewise occuring of characters. and returns a tuple

RegexGen().digits(1, 10, RegexGen.exclude("23", True), capture=True)

The regex is displayed as:
regex = "(?:(?![23])\d)+"
characters : str //characters to be matched pattern_prevent : bool => default = False //On True, prevents the characters sequence to match(The sequence must not contain a range) //and on false prevent piecewise occuring of characters. return : tuple

@staticmethod

def boundary_character(character: str, start: bool = True) -> str: View Source

140    @staticmethod
141    def boundary_character(character: str, start: bool = True) -> str:
142        '''
143        <code>Boundary character</code> gives left or right boundary of the word as required.<br>
144        In the function definition, <code> boundary_character(character: str, start: bool = True) -> str: </code><br>
145        <ol>
146        <li> character: str signifies characters to be matched</li>
147        <li>start : bool (default = True) On true, the letter is the left boundary of the word<br> 
148                                        and on false the letter is the right boundary of the word.
149    '''
150        if len(character) > 2:
151            raise Exception("In function {}, start : {} => Character cannot be length greater than two",
152                            RegexGen.boundary_character.__name__, start)
153        elif len(character) == 2 and character not in {"\w", "\W", "\d", "\."}:
154            raise Exception("In function {}, start : {} => Character is not a \w or \W or \d or \.",
155                            RegexGen.boundary_character.__name__, start)
156
157        character_str = "\b" + character if start else character + "\b"
158        
159        return character_str

Boundary character gives left or right boundary of the word as required.
In the function definition, boundary_character(character: str, start: bool = True) -> str:

character: str signifies characters to be matched
start : bool (default = True) On true, the letter is the left boundary of the word
and on false the letter is the right boundary of the word.

def add_quantifier(self, min: int, max: int, **kwargs) -> str: View Source

162    def add_quantifier(self, min: int, max: int, **kwargs) -> str:
163        """
164            <code>__add_quantifier</code>adds quantifiers like ? + * x(n,m).<br>
165        The function definition is:  <code>__add_quantifier(self, min: int, max: int, **kwargs)</code>.<br>
166        The regex generated depends on the value of min and max.
167            <ol>
168                <li>min == max and max == 0:<br>
169                    If no characters exist then the exception is raised stating min and max can't be zero.       
170                    <pre> regex = " "</pre>         </li>      
171                 <li>max == min and min == 1:
172                    <pre> regex = "^foo.{1,5}bar"</pre></li>
173                <li>max == min:
174                    <pre> regex = "^foo.{1,5}bar"</pre></li>                
175                <li>min == 0 and max == 1:
176                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
177                <>max == 0 and min > 0:
178                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
179                <li>max > min and min > 0:
180                    <pre> regex = "^foo.{1,5}bar"</pre></li>  
181                <li>Else:  
182                    If no characters exist then the exception is raised stating min and max can't be zero.</li>                      
183                </ol>
184        <pre> regex = "^foo.{1,5}bar"</pre>
185        Add Quantifiers like {0},{0,1},?,*,+,{0,1}
186    """
187        regexchar: str = str()
188
189        if min == max and max == 0:
190            zeroormore = kwargs.get("zeroormore", False)
191            oneormore = kwargs.get("oneormore", False)
192            if zeroormore:
193                regexchar += '*'
194            elif oneormore:
195                regexchar += '+'
196            else:
197                raise Exception("In function {} => Min And Max Cannot be Zero"
198                                .format(self.__add_quantifier.__name__))
199        elif max == min and min == 1:
200            regexchar = ""
201        elif max == min:
202            regexchar = f"{{{min}}}"
203        elif min == 0 and max == 1:
204            regexchar = "?"
205        elif max == 0 and min > 0:
206            regexchar = f"{{{min},}}"
207        elif max > min and min > 0:
208            regexchar = f"{{{min},{max}}}"
209        else:
210            regexchar = f"{{,{max}}}"
211
212        return regexchar

__add_quantifieradds quantifiers like ? + * x(n,m).
The function definition is: __add_quantifier(self, min: int, max: int, **kwargs).
The regex generated depends on the value of min and max.

min == max and max == 0:
If no characters exist then the exception is raised stating min and max can't be zero.
```
 regex = " "
```

max == min and min == 1:
```
 regex = "^foo.{1,5}bar"
```
max == min:
```
 regex = "^foo.{1,5}bar"
```

min == 0 and max == 1:
```
 regex = "^foo.{1,5}bar"
```

 regex = "^foo.{1,5}bar"

max > min and min > 0:
```
 regex = "^foo.{1,5}bar"
```

Else:
If no characters exist then the exception is raised stating min and max can't be zero.

 regex = "^foo.{1,5}bar"

Add Quantifiers like {0},{0,1},?,*,+,{0,1}

def text( self, character: str, min: int = 0, max: int = 0, **kwargs) -> typing_extensions.Self: View Source

214    def text(self, character: str, min: int = 0, max: int = 0, **kwargs) -> Self:
215        ''' 
216        <p><code>Text</code> function simply adds the input to regex syntax.</p>
217        <pre><code><p>RegexGen.text("This is a text.") </p></code></pre>           
218        
219        <p>The regex is displayed as:</p>
220        <pre> regex = "This is a text."</pre>
221        Text is generated using Characters function.
222        character : str // A character can be a word, alphabet, a digit or number and symbols or a range
223        min : int => default = 0  // if min and max are both zero it must pass a keyword argument as True 
224        max : int  => default = 0
225        capture : bool => default = False //On True enclose the character in parenthesis so that regex engine capture data
226        kwargs : dict => {
227            zeroormore : bool => default=False,
228            oneormore : bool => default=False
229        }
230        return : RegexGen
231    '''
232        letterstr: str = str()
233        temp: str = str()
234
235        if not character:
236            raise Exception("In function {}, Character : {} => Character cannot be None".format(
237                self.text.__name__, character))
238
239        letterstr = character
240
241        try:
242            temp = self.__add_quantifier(min, max, **kwargs)
243        except Exception as e:
244            raise
245
246        self.__regex_data += letterstr
247        self.__regex_data += temp
248
249        return self

Text function simply adds the input to regex syntax.

RegexGen.text("This is a text.")

The regex is displayed as:

 regex = "This is a text."

Text is generated using Characters function. character : str // A character can be a word, alphabet, a digit or number and symbols or a range min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True max : int => default = 0 capture : bool => default = False //On True enclose the character in parenthesis so that regex engine capture data kwargs : dict => { zeroormore : bool => default=False, oneormore : bool => default=False } return : RegexGen

def any( self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> typing_extensions.Self: View Source

251    def any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:
252        ''' 
253        <p><code>Any</code> function generates a regex which can be utilized to check if a certain character exists in the expression<br>
254            In the function definition, 
255            <code>any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self:</code>
256            ,
257            <ol class="list-group list-group-numbered">
258                <li class="list-group-item d-flex justify-content-between align-items-start">
259                    <div class="ms-2 me-auto">
260                        the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br>
261                    </div></li>
262                <li class="list-group-item d-flex justify-content-between align-items-start">
263                    <div class="ms-2 me-auto">
264                        If on capture : bool (default=False) True is passed, it  enclose . in parenthesis so that regex engine capture data. <br>
265                    </div></li>
266                <li class="list-group-item d-flex justify-content-between align-items-start">
267                    <div class="ms-2 me-auto">
268                        The kwargs : dict accepts {<br>
269                            zeroormore : bool  (default=False), <br>
270                            oneormore : bool  (default=False)            
271                    </div></li>
272                    <li class="list-group-item d-flex justify-content-between align-items-start">
273                        <div class="ms-2 me-auto">
274                            The return returns RegexGen
275                        </div></li></ol><br></p>
276        <pre><code>regex = RegexGen()
277            regex = regex.any(min=0, max=12)</code></pre>           
278        
279        <p>The regex is displayed as:</p>
280        <pre>  regex = ".{0,12}"</pre> 
281        . character symbolizes any character
282        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
283        max : int => default = 0
284        capture : bool => default=False //On True enclose . in parenthesis so that regex engine capture data
285        kwargs : dict => {
286            zeroormore : bool => default=False,
287            oneormore : bool => default=False
288        }
289        return : RegexGen
290    '''
291        anystr: str = str()
292        temp: str = str()
293
294        try:
295            temp = self.__add_quantifier(min, max, **kwargs)
296        except (...):
297            raise
298
299        anystr = f"(.{temp})" if capture else f".{temp}"
300        self.__regex_data += anystr
301
302        return self

Any function generates a regex which can be utilized to check if a certain character exists in the expression
In the function definition, any(self, min: int = 0, max: int = 0, capture: bool = False, **kwargs) -> Self: ,

the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0).
If on capture : bool (default=False) True is passed, it enclose . in parenthesis so that regex engine capture data.
The kwargs : dict accepts {
zeroormore : bool (default=False),
oneormore : bool (default=False)
The return returns RegexGen

regex = RegexGen()
    regex = regex.any(min=0, max=12)

The regex is displayed as:

  regex = ".{0,12}"

. character symbolizes any character min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True max : int => default = 0 capture : bool => default=False //On True enclose . in parenthesis so that regex engine capture data kwargs : dict => { zeroormore : bool => default=False, oneormore : bool => default=False } return : RegexGen

def digits( self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> typing_extensions.Self: View Source

305    def digits(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self:
306        '''
307        This function is used to match only numbers that may not contain a sequence of number or the each numbers existing independently.
308        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
309        max : int => default = 0
310        pattern : a tuple[str, bool] expected a return type from exclude static function
311        capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data
312        kwargs : dict => {
313            zeroormore : bool => default=False,
314            oneormore : bool => default=False
315        }
316        return : RegexGen
317    '''
318        digitstr: str = str()
319        temp: str = str()
320
321        try:
322            temp = self.__add_quantifier(min, max, **kwargs)
323        except (...):
324            raise
325
326        if pattern is None:
327            digitstr = f"(\d{temp})" if capture else f"\d{temp}"
328        elif pattern[1]:
329            digitstr = f"((?!{pattern[0]})\d){temp}" if capture else f"(?:(?!{pattern[0]})\d){temp}"
330        else:
331            digitstr = f"((?![{pattern[0]}])\d){temp}" if capture else f"(?:(?![{pattern[0]}])\d){temp}"
332
333        self.__regex_data += digitstr
334
335        return self

This function is used to match only numbers that may not contain a sequence of number or the each numbers existing independently. min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True max : int => default = 0 pattern : a tuple[str, bool] expected a return type from exclude static function capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data kwargs : dict => { zeroormore : bool => default=False, oneormore : bool => default=False } return : RegexGen

def alphabets( self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> typing_extensions.Self: View Source

337    def alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) -> Self:
338        '''
339        <p><code>Alphabets</code> function matches only words(not numbers) that may not contain a sequence of letters or each of the letters exist independently.<br>
340                </p>
341        In the function definition, 
342        <code>alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs)</code>
343        , <br><br>
344        <ol class="list-group list-group-numbered">
345        <li class="list-group-item d-flex justify-content-between align-items-start">
346            <div class="ms-2 me-auto">
347                the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0). <br>
348            </div></li>
349        <li class="list-group-item d-flex justify-content-between align-items-start">
350            <div class="ms-2 me-auto">
351                In pattern, a tuple[str, bool] is expected as a return type from exclude static function. <br>
352            </div></li>
353        <li class="list-group-item d-flex justify-content-between align-items-start">
354            <div class="ms-2 me-auto">
355                If on capture : bool (default=False), True is passed, it  encloses the regex syntax in parenthesis so that regex engine captures data.<br>
356            </div></li>
357        <li class="list-group-item d-flex justify-content-between align-items-start">
358            <div class="ms-2 me-auto">
359                The kwargs : dict accepts {<br>
360                    zeroormore : bool  (default=False), <br>
361                    oneormore : bool  (default=False)            
362            </div></li>
363            <li class="list-group-item d-flex justify-content-between align-items-start">
364                <div class="ms-2 me-auto">
365                    The return returns RegexGen
366                </div></li></ol><br>
367        <pre><code><p>regex = RegexGen().alphabets(1,5)</p></code></pre>           
368        <p>The regex is displayed as:</p>
369        <pre> regex = "[a-zA-Z]{1,5}"</pre>
370        This function is used to match only words(not numbers) that may not contain a sequence of letters or the each letters existing independently.
371        min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True 
372        max : int => default = 0
373        pattern : a tuple[str, bool] expected a return type from exclude static function
374        capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data
375        kwargs : dict => {
376            zeroormore : bool => default=False,
377            oneormore : bool => default=False
378        }
379        return : RegexGen
380    '''
381        characterstr: str = str()
382        temp: str = str()
383
384        try:
385            temp = self.__add_quantifier(min, max, **kwargs)
386        except (...):
387            raise
388
389        if pattern is None:
390            characterstr = f"([a-zA-Z]{temp})" if capture else f"[a-zA-Z]{temp}"
391        elif pattern[1]:
392            characterstr = f"((?!{pattern[0]})a-zA-Z){temp}" if capture else f"(?:(?!{pattern[0]})a-zA-Z){temp}"
393        else:
394            characterstr = f"((?![{pattern[0]}])a-zA-Z){temp}\b" if capture else f"(?:(?![{pattern[0]}])a-zA-Z){temp}"
395
396        self.__regex_data += characterstr
397
398        return self

Alphabets function matches only words(not numbers) that may not contain a sequence of letters or each of the letters exist independently.

In the function definition, alphabets(self, min: int = 0, max: int = 0, pattern: Tuple[str, bool] = None, capture: bool = False, **kwargs) ,

the min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True max : int (default = 0).
In pattern, a tuple[str, bool] is expected as a return type from exclude static function.
If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that regex engine captures data.
The kwargs : dict accepts {
zeroormore : bool (default=False),
oneormore : bool (default=False)
The return returns RegexGen

regex = RegexGen().alphabets(1,5)

The regex is displayed as:

 regex = "[a-zA-Z]{1,5}"

This function is used to match only words(not numbers) that may not contain a sequence of letters or the each letters existing independently. min : int => default = 0 // if min and max are both zero it must pass a keyword argument as True max : int => default = 0 pattern : a tuple[str, bool] expected a return type from exclude static function capture : bool => default=False //On True enclose the regex syntax in parenthesis so that regex engine capture data kwargs : dict => { zeroormore : bool => default=False, oneormore : bool => default=False } return : RegexGen

def get_non_capturing_regex(self) -> str: View Source

400    def get_non_capturing_regex(self) -> str:
401        '''
402        If the program have capture parameters it will prevent regex engine from capturing index and patterns from the string 
403        reducing capturing overhead and hence increase efficiency
404        return : str 
405    '''
406        return f"(?:{self.__regex_data})"

If the program have capture parameters it will prevent regex engine from capturing index and patterns from the string reducing capturing overhead and hence increase efficiency return : str

def get_regex_data(self) -> str: View Source

408    def get_regex_data(self) -> str:
409        '''
410        Returns a regex syntax that may capture the text from the input string. 
411        return : str
412    '''
413        return self.__regex_data

Returns a regex syntax that may capture the text from the input string. return : str

def combine(self, regex: typing_extensions.Self) -> typing_extensions.Self: View Source

415    def combine(self, regex: Self) -> Self:
416        '''
417        <p><code>Combine</code> function creates a regex syntax to combine two regex expressions in one to create a pattern.<br>
418                </p>
419        In the function definition, 
420        <code>combine(self, regex: Self)</code>
421        , the function accepts value of two different regex to perform the combination operation. <br>
422        <pre><code><p>regexa = RegexGen().digits(4,4).text(RegexGen.characters('-'))
423            regexb = RegexGen().digits(3,3)
424            regex = RegexGen.combine(regexa, regexb) </p></code></pre>           
425        
426        <p>The regex is displayed as:</p>
427        <pre> regex = "\d{4,4}-\d{3,3}"</pre>
428        regex : RegexGen //Object that has regex syntax which is addable
429        return : RegexGen 
430    '''
431        if len(regex.__regex_data) == 0:  # and regex.__regex_data[0] == '^':
432            raise Exception("Invalid regex to combine")
433
434        self.__regex_data += regex.__regex_data
435
436        return self

Combine function creates a regex syntax to combine two regex expressions in one to create a pattern.

In the function definition, combine(self, regex: Self) , the function accepts value of two different regex to perform the combination operation.

regexa = RegexGen().digits(4,4).text(RegexGen.characters('-'))
    regexb = RegexGen().digits(3,3)
    regex = RegexGen.combine(regexa, regexb)

The regex is displayed as:

 regex = "\d{4,4}-\d{3,3}"

regex : RegexGen //Object that has regex syntax which is addable return : RegexGen

@staticmethod

def any_of(characters: str, capture: bool = False, **kwargs) -> str: View Source

438    @staticmethod
439    def any_of(characters: str, capture: bool = False, **kwargs) -> str:
440        '''
441        <p><code>Any of</code> function is any_of_the_block with quantifiers or simply put, this function defines repetition of words in the list.</p> <br>
442        In the function definition, <code>any_of(characters: str, capture: bool = False, **kwargs) -> str:</code>,
443        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
444        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
445        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
446        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
447        <li>The return returns RegexGen.</li></ol>
448
449        '''
450        if valid_ranges(characters, is_number, is_lower_case, is_upper_case) or characters.find(RegexGen.symbolsrange) != -1:
451            pass
452
453        for character in characters:
454            if not type("a").isascii(character):
455                raise Exception("In function {}, character : {} => Non ascii character is not acceptable".format(
456                    RegexGen.any_of.__name__, character))
457        return f"([{characters}])" if capture else f"(?:[{characters}])"

Any of function is any_of_the_block with quantifiers or simply put, this function defines repetition of words in the list.

In the function definition, any_of(characters: str, capture: bool = False, **kwargs) -> str:,

The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .
In pattern, a tuple[str, bool] is expected as a return type from exclude static function.
If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.
The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).
The return returns RegexGen.

@staticmethod

def characters(char: str) -> str: View Source

459    @staticmethod
460    def characters(char: str) -> str:
461        '''
462        <p><code>Characters</code> function is a static function which is unable to create a regex syntax.<br>
463        Instead, a function like Text is used to submit to the regex syntax. <br>
464        Characters is used some characters predefined in the regex library are used and thus they need to be escaped.
465        <pre><code><p>RegexGen.text(RegexGen.characters("This+is{a$text.") </p></code></pre>           
466                        
467                        <p>The regex is displayed as:</p>
468                        <samp> regex = "This\+is\{a\$text\."</samp><br>
469        some characters are predefined in the regex library thus they need to be escaped
470        return : str 
471        </p>
472    '''
473        letters: str = str()
474        if not char:
475            raise Exception("In function {}, character : {} => Input cannot be none ".format(
476                RegexGen.character.__name__, char))
477
478        predefined_symbols: set = {
479            '\\', '.', '(', ')', '*', '{', '}', '^', '+', '?', '[', ']', '$', '|'}
480
481        for lettr in char:
482            if lettr in predefined_symbols:
483                letters += f"\\{lettr}"
484            else:
485                letters += lettr
486        return letters

Characters function is a static function which is unable to create a regex syntax.
Instead, a function like Text is used to submit to the regex syntax.
Characters is used some characters predefined in the regex library are used and thus they need to be escaped.

RegexGen.text(RegexGen.characters("This+is{a$text.")

The regex is displayed as:

regex = "This\+is\{a\$text\."
some characters are predefined in the regex library thus they need to be escaped return : str

def succeeded_by( self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> typing_extensions.Self: View Source

488    def succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:
489        '''
490        This function is used to match the pattern succeeded by another pattern.<br>
491        In the function definition, <code>succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>,
492        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
493        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
494        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
495        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
496        <li>The return returns RegexGen.</li></ol>
497    '''
498        if not preceeding or len(preceeding) != 2:
499            raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format(
500                RegexGen.succeeded_by.__name__))
501        if not succeeding or len(succeeding) != 2:
502            raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format(
503                RegexGen.succeeded_by.__name__))
504
505        characterstr: str = str()
506        temp: str = str()
507
508        try:
509            temp = self.__add_quantifier(min, max, **kwargs)
510        except (...):
511            raise
512
513        followblock: str = str()
514        if invert:
515            followblock = f"(?!{succeeding[0]})" if succeeding[1] else f"(?![{succeeding[0]}])"
516        else:
517            followblock = f"(?={succeeding[0]})" if succeeding[1] else f"(?=[{succeeding[0]}])"
518
519        precedingblock: str = f"{preceeding[0]}{temp}" if preceeding[1] else f"[{preceeding[0]}]{temp}"
520
521        if len(self.__regex_data) > len(precedingblock) and \
522                self.__regex_data.rindex(precedingblock) == len(self.__regex_data)-len(precedingblock)-1:
523            characterstr += followblock
524            self.__regex_data = self.__regex_data[:-1]
525            characterstr += ')'
526        else:
527            characterstr = precedingblock + followblock
528            characterstr = f"({characterstr})" if capture else f"(?:{characterstr})"
529        self.__regex_data += characterstr
530        return self

This function is used to match the pattern succeeded by another pattern.
In the function definition, succeeded_by(self, preceeding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:,

The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .
In pattern, a tuple[str, bool] is expected as a return type from exclude static function.
If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.
The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).
The return returns RegexGen.

def preceded_by( self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> typing_extensions.Self: View Source

532    def preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:
533        '''
534        This function is used to match pattern that is preceded by another pattern.<br>
535        If the pattern of the succeeded_by and preceeded_by matches the combination is union. <br>
536      In the function definition, <code>preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:</code>,
537        <ol><li>The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .</li>
538        <li>In pattern, a tuple[str, bool] is expected as a return type from exclude static function.</li>
539        <li>If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.</li>
540        <li>The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).</li>
541        <li>The return returns RegexGen.</li></ol>
542    '''
543        if not preceding or len(preceding) != 2:
544            raise Exception("In function {} => characters1 tuple cannot be none or its length must be 2".format(
545                RegexGen.preceded_by.__name__))
546        if not succeeding or len(succeeding) != 2:
547            raise Exception("In function {} => characters2 tuple cannot be none or its length must be 2".format(
548                RegexGen.preceded_by.__name__))
549
550        characterstr: str = str()
551        temp: str = str()
552
553        try:
554            temp = self.__add_quantifier(min, max, **kwargs)
555        except (...):
556            raise
557
558        preceedingblock: str = str()
559        if invert:
560            preceedingblock = f"(?<!{preceding[0]})" if preceding[1] else f"(?<![{preceding[0]}])"
561        else:
562            preceedingblock = f"(?<={preceding[0]})" if preceding[1] else f"(?<=[{preceding[0]}])"
563
564        followblock: str = f"{succeeding[0]}{temp}" if succeeding[1] else f"[{succeeding[0]}]{temp}"
565        characterstr = preceedingblock + followblock
566        characterstr = f"({characterstr})" if capture else f"(?:{characterstr})"
567        self.__regex_data += characterstr
568        return self

This function is used to match pattern that is preceded by another pattern.
If the pattern of the succeeded_by and preceeded_by matches the combination is union.
In the function definition, preceded_by(self, preceding: Tuple[str, bool], succeeding: Tuple[str, bool], min: int = 0, max: int = 0, capture: bool = False, invert: bool = False, **kwargs) -> Self:,

The min : int and max : int has default = 0. If min and max are both zero it must pass a keyword argument as True .
In pattern, a tuple[str, bool] is expected as a return type from exclude static function.
If on capture : bool (default=False), True is passed, it encloses the regex syntax in parenthesis so that the regex engine captures data.
The kwargs : dict accepts { zeroormore : bool (default=False), oneormore : bool (default=False).
The return returns RegexGen.