matx.runtime.regex 源代码

# Copyright 2022 ByteDance Ltd. and/or its affiliates.
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from .. import _ffi
from .object import Object
from . import _ffi_api
from .object_generic import ObjectTypes, to_runtime_object

from typing import Union


[文档]@_ffi.register_object("Regex")
class Regex(Object):
    """Regular class implemented using pcre.

    Args:
        pattern (str): Str types. Regular expression pattern.
        ignore_case (bool): Booleans. Perform case-insensitive matching. The default is false
        dotall (bool): Booleans. "." matches any character at all, including the newline. The default is false
        extended (bool): Booleans. Most white space in the pattern (other than in a character class), and characters between a # outside a character class and the next newline, inclusive, are ignored. An escaping backslash can be used to include a white space or # character as part of the pattern. The default is false.
        anchored (bool): Booleans. Matches only at the beginning of the subject. The default is false.
        ucp (bool): Booleans. Sequences such as "\\d" and "\\w" use Unicode properties to determine character types, instead of recognizing only characters with codes less than 128 via a lookup table. The default is false.



    Examples:
        >>> import matx
        >>> regex = matx.Regex("(?<first>.*) are (?<second>.*?) .*")
        >>> regex
        Object(0x55c11322a200)
    """

    __hash__ = None

[文档]    def __init__(
            self,
            pattern,
            ignore_case=False,
            dotall=False,
            extended=False,
            anchored=False,
            ucp=True):
        self.__init_handle_by_constructor__(
            _ffi_api.Regex,
            to_runtime_object(pattern),
            ignore_case,
            dotall,
            extended,
            anchored,
            ucp)

    def __repr__(self):
        return _ffi_api.RTValue_Repr(self)

[文档]    def split(self, string: Union[str, bytes]):
        """Split a string by the occurrences of a pattern.

        Args:
            string (str|bytes): The source string.

        Returns:
            List[str|bytes]: A list containing the resulting substrings. If no match was found，returning a list containing only the source string, i.e. [input].

        Examples:
            >>> import matx
            >>> regex = matx.Regex("name")
            >>> tokens = regex.split("mynameisHE")
            >>> tokens
            ['my', 'isHE']
        """
        return _ffi_api.RegexSplit(self, string)

[文档]    def replace(self, string: Union[str, bytes], repl: Union[str, bytes]):
        """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the input string by the replacement repl.

        Args:
            string (str|bytes): The source string.
            repl (str|bytes): The replacement string.

        Returns:
            str|bytes: The replaced string. If no match was found, returning the source string.

        Examples:
            >>> import matx
            >>> regex = matx.Regex("name")
            >>> new_str = regex.replace("mynameisHE", "NAME")
            >>> new_str
            myNAMEisHE
        """
        return _ffi_api.RegexReplace(self, string, repl)

[文档]    def match(self, string: Union[str, bytes], offset: int = 0):
        """Try to apply the pattern at the start of the string, returning a tuple containing the matched string. If grouping version of regular pattern is used, then the text of all groups are returned.

        Args:
            string (str|bytes): The source string.
            offset (int): Offset in the subject at which to start matching

        Returns:
            Tuple(List, Dict): The matched groups. The first element in the tuple is indexed groups. The second element in the tuple is named groups.

        Examples:
            >>> import matx
            >>> regex = matx.Regex("(?<first>.*) are (?<second>.*?) .*")
            >>> matched_result = regex.match("Cats are smarter than dogs")
            >>> matched_result[0]
            ['Cats are smarter than dogs', 'Cats', 'smarter']
            >>> matched_result[1]
            {'first': 'Cats', 'second': 'smarter'}
        """
        return _ffi_api.RegexMatch(self, string, offset)