Module PdmContext.utils.mapping_functions

Expand source code
class map_base():
    def __init__(self):
        self.existing_results = {}
        self.existing_timestamps = {}
        self.last_occurance = {}

    def find_pos(self, name, occurrences, target_series):
        if name not in self.existing_results.keys():
            self.existing_results[name] = []
            self.existing_timestamps[name] = []
            self.last_occurance[name] = None
            pos_occ = 0
            pos_target = 0
        else:
            pos_occ = len(occurrences)
            for i in range(len(occurrences) - 1, -1, -1):
                pos_occ = i
                if occurrences[i][1] <= self.last_occurance[name]:
                    pos_occ += 1
                    break
            pos_target = len(target_series)
            for i in range(len(target_series) - 1, -1, -1):
                pos_target = i
                if target_series[i][1] <= self.existing_timestamps[name][-1]:
                    pos_target += 1
                    break
        self.last_occurance[name] = occurrences[-1][1]
        return pos_target, pos_occ

    def trim(self, name, target_series):
        self.existing_timestamps[name] = self.existing_timestamps[name][-len(target_series):]
        self.existing_results[name] = self.existing_results[name][-len(target_series):]


class map_categorical_to_continuous:
    """
    Wrapper Class for mapping categorical to continuous time-series.

    """

    def __init__(self):
        pass

    def map(self, target_series, occurrences, name):
        """
        This method is used to generate context time series of categorical type.

        The way we do this is by generating a time series of each different category, by creating a zero and one series,
        similar to "isolated" type, by filling ones in the timestamps that each category appears. Finally, we create an
        additional series with the name state_{name}, having zeros until the occurrence of the last category, and filled
        with ones afterward.

        **Parameters**:

        **target_series**: Used to align sample rate.

        **occurrences**: a list of tuple with timestamps and categorical value, refering to the observed value of a
        categorical source.

        **name**: name of the categorical source.


        **return**: A list of time-series to populate CD part of the context.
        """
        vector = [[0] for i in range(len(target_series))]
        pos = 0
        unique_categories = set([occ[0] for occ in occurrences])
        # this is to aling the series in case of different sample Rate
        for i in range(len(target_series)):
            timestamp = target_series[i][1]
            current_pos = pos
            for q in range(pos, len(occurrences)):
                if occurrences[q][1] > timestamp:
                    current_pos = q
                    break
            # no data found

            if current_pos == pos:
                # if no data in betwwen values use the previus value
                if i > 0:
                    vector[i] = [occurrences[-1][0]]
                # if no data until i timestamp use the first occurence as value
                else:
                    vector[i] = [occurrences[0][0]]
            # if multiple values in between two timestamps use the last as value
            else:
                dataInBetween = [value for value, time in occurrences[pos:current_pos]]
                vector[i] = [v for v in set(dataInBetween)]
            # if no other occurrences just repeat the last value
            if current_pos == len(occurrences):
                for k in range(i + 1, len(vector)):
                    vector[k] = [occurrences[-1][0]]
                break
            pos = current_pos
        all_vectors = []
        all_names = []
        # one-hot encoding of unique categories
        for value in unique_categories:
            in_vector = [1 if value in v else 0 for v in vector]
            if len(set(in_vector)) == 1:
                in_vector[0] = 0
            all_vectors.append(in_vector)
            all_names.append(f"{value}_{name}")
        # create of the state variable
        state_vector = [0 for i in range(len(target_series))]
        lastv = occurrences[-1][0]
        ## not stable
        for i in range(len(state_vector) - 1, -1, -1):
            if lastv in vector[i]:
                state_vector[i] = 1
            else:
                break
        all_vectors.append(state_vector)
        all_names.append(f"state_{name}")
        return all_vectors, all_names


class map_configuration_to_continuous(map_base):
    """
        Wrapper Class for mapping configuration events (defined as events with constant impact) to continuous time-series.

    """

    def __init__(self):
        super().__init__()
        self.existing_results = {}
        self.base = {}

    def map(self, target_series, occurrences, name):
        """
         Configuration events, refers to configuration changes or events that alter the state of the monitored asset.
          To transform these events into continuous signals, we start with a series of 0s, and after each occurrence of
          such an event, we add 1 to all the positions after the occurrence's timestamp

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: Contain time stamps of the occurrences of an isolated type source.

        **return**: A binary time series with same size as target_series, that models the occurrences
        of the provided Configuration source, to populate CD part of the context.
        """
        if name not in self.base.keys():
            self.base[name] = 0
        pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

        for i in range(pos_target, len(target_series)):
            if pos_occ < len(occurrences) and occurrences[pos_occ][1] <= target_series[i][1]:
                self.base[name] += 1
                self.base[name] = self.base[name] % (len(target_series) + 1)
                self.existing_results[name].append(self.base[name])
                self.existing_timestamps[name].append(target_series[i][1])
                # finde next occurance after target_series[ti][1] timestamp
                while occurrences[pos_occ][1] <= target_series[i][1] and pos_occ < len(occurrences) - 1:
                    pos_occ += 1
                if occurrences[pos_occ][1] <= target_series[i][
                    1]:  # no other occurencies after target_series[ti][1] timestamp
                    for innert1 in range(i + 1, len(target_series)):
                        self.existing_results[name].append(self.base[name])
                        self.existing_timestamps[name].append(target_series[innert1][1])
                    break
            else:
                self.existing_results[name].append(self.base[name])
                self.existing_timestamps[name].append(target_series[i][1])
        self.trim(name, target_series)

        return [self.existing_results[name]], [name]


class map_configuration_to_continuous_deprecated:
    """
        Wrapper Class for mapping configuration events (defined as events with constant impact) to continuous time-series.

    """

    def __init__(self):
        pass

    def map(self, target_series, occurrences, name):
        """
         Configuration events, refers to configuration changes or events that alter the state of the monitored asset.
          To transform these events into continuous signals, we start with a series of 0s, and after each occurrence of
          such an event, we add 1 to all the positions after the occurrence's timestamp

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: Contain time stamps of the occurrences of an isolated type source.

        **return**: A binary time series with same size as target_series, that models the occurrences
        of the provided Configuration source, to populate CD part of the context.
        """
        vector = [0 for i in range(len(target_series))]
        ci = 0
        base = 0
        for ti in range(len(target_series)):
            if occurrences[ci][1] <= target_series[ti][1]:
                base += 1
                # finde next occurance after target_series[ti][1] timestamp
                while occurrences[ci][1] <= target_series[ti][1] and ci < len(occurrences) - 1:
                    ci += 1
                if occurrences[ci][1] <= target_series[ti][
                    1]:  # no other occurencies after target_series[ti][1] timestamp
                    for innert1 in range(ti, len(target_series)):
                        vector[innert1] = base
                    break
            vector[ti] = base
        ## not stable
        if len(set(vector)) == 1:
            vector[0] = 0
        return [vector], [name]


class map_isolated_to_continuous(map_base):
    """
    Wrapper Class for mapping isolated events (defined as events with instant impact) to continuous time-series.
    """

    def __init__(self):
        super().__init__()

    def map(self, target_series, occurrences, name):
        """
         Isolated events are discrete events that have an immediate impact on the behavior of the asset.
         To transform such events into a continuous representation, we start with a series of 0s as an initial signal
         and assign 1 to the position corresponding to the timestamps of the events. If the event timestamp does
         not match any target_series timestamps, it is mapped to the closest timestamp in target_series.

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: Contain time stamps of the occurrences of an isolated type source.

        **return**: A binary time series with same size as target_series, that models the occurrences
        of the provided isolated source, to populate CD part of the context.
        """
        pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

        for i in range(pos_target, len(target_series)):
            if pos_occ < len(occurrences) and occurrences[pos_occ][1] <= target_series[i][1]:
                self.existing_results[name].append(1)
                self.existing_timestamps[name].append(target_series[i][1])
                # finde next occurance after target_series[ti][1] timestamp
                while occurrences[pos_occ][1] <= target_series[i][1] and pos_occ < len(occurrences) - 1:
                    pos_occ += 1
                if occurrences[pos_occ][1] <= target_series[i][
                    1]:  # no other occurencies after target_series[ti][1] timestamp
                    for innert1 in range(i + 1, len(target_series)):
                        self.existing_results[name].append(0)
                        self.existing_timestamps[name].append(target_series[innert1][1])
                    break
            else:
                self.existing_results[name].append(0)
                self.existing_timestamps[name].append(target_series[i][1])
        self.trim(name, target_series)

        return [self.existing_results[name]], [name]


class map_isolated_to_continuous_deprecated:
    """
           Wrapper Class for mapping isolated events (defined as events with instant impact) to continuous time-series.

    """

    def __init__(self):
        pass

    def map(self, target_series, occurrences, name):
        """
         Isolated events are discrete events that have an immediate impact on the behavior of the asset.
         To transform such events into a continuous representation, we start with a series of 0s as an initial signal
         and assign 1 to the position corresponding to the timestamps of the events. If the event timestamp does
         not match any target_series timestamps, it is mapped to the closest timestamp in target_series.

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: Contain time stamps of the occurrences of an isolated type source.

        **return**: A binary time series with same size as target_series, that models the occurrences
        of the provided isolated source, to populate CD part of the context.
        """
        ci = 0
        vector = [0 for i in range(len(target_series))]
        for ti in range(len(target_series)):
            if occurrences[ci][1] <= target_series[ti][1]:
                vector[ti] = 1
                # finde next occurance after target_series[ti][1] timestamp
                while occurrences[ci][1] <= target_series[ti][1] and ci < len(occurrences) - 1:
                    ci += 1
                if occurrences[ci][1] <= target_series[ti][
                    1]:  # no other occurencies after target_series[ti][1] timestamp
                    break
        return [vector], [name]


class map_univariate_to_continuous_deprecated:
    """
        Wrapper Class for mapping univariate time-series (defined as events with instant impact) to continuous time-series with same frequency as a target time-series.
    """

    def __init__(self):
        self.existing_results = {}

    def map(self, target_series, occurrences, name):
        """
        For continuous data sources, we simply collect the values within the time window.
        Although the time window is the same for all sources, each source may have a different sample rate.
        To create a signal of the same size as target_series, we perform mean aggregation if a source has a higher
        sample rate than target_series, using the mean value of the data between each timestamp of the target_series.

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: The univariate time series.

        **return**: A time series with same size as target_series, to populate CD part of context.
        """

        if name not in self.existing_results.keys():
            self.existing_results[name] = []
        # position of first timestamp of target series in aggregated data
        spos = -1
        for tup in self.existing_results[name]:
            spos += 1
            if tup[1] >= target_series[0][1]:
                break

        self.existing_results[name] = self.existing_results[name][spos:]

        pos = 0
        if len(self.existing_results[name]) > 0:
            for q in range(len(occurrences)):
                pos = q
                if occurrences[q][1] > self.existing_results[name][-1][1]:
                    break

        vector = [tup[0] for tup in self.existing_results[name]] + [0 for i in range(
            len(target_series) - len(self.existing_results[name]))]

        for i in range(len(self.existing_results[name]), len(target_series)):
            timestamp = target_series[i][1]
            current_pos = pos
            for q in range(pos, len(occurrences)):
                if occurrences[q][1] > timestamp:
                    current_pos = q
                    break
            if i == len(target_series) - 1:
                current_pos = len(occurrences) + 1
            # no data found
            if current_pos == pos:
                # if no data in betwwen values use the previus value
                if i > 0:
                    vector[i] = vector[i - 1]
                # if no data until i timestamp use the first occurence as value
                else:
                    vector[i] = occurrences[0][0]
            # if multiple values in between two timestamps use the mean of them as value
            else:
                dataInBetween = [value for value, time in occurrences[pos:current_pos]]
                vector[i] = sum(dataInBetween) / len(dataInBetween)
            # if no other occurrences just repeat the last value
            if current_pos == len(occurrences):
                for k in range(i + 1, len(vector)):
                    vector[k] = vector[k - 1]
                break
            pos = current_pos
        self.existing_results[name] = [(v, tup[1]) for v, tup in zip(vector, target_series)]
        return [vector], [name]


class map_univariate_to_continuous(map_base):
    """
        Wrapper Class for mapping univariate time-series (defined as events with instant impact) to continuous time-series with same frequency as a target time-series.
    """

    def __init__(self):
        super().__init__()

    def map(self, target_series, occurrences, name):
        """
        For continuous data sources, we simply collect the values within the time window.
        Although the time window is the same for all sources, each source may have a different sample rate.
        To create a signal of the same size as target_series, we perform mean aggregation if a source has a higher
        sample rate than target_series, using the mean value of the data between each timestamp of the target_series.

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: The univariate time series.

        **return**: A time series with same size as target_series, to populate CD part of context.
        """
        pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

        for i in range(pos_target, len(target_series)):

            current_pos = len(occurrences)
            for q in range(pos_occ, len(occurrences)):
                if occurrences[q][1] > target_series[i][1]:
                    current_pos = q
                    break
            if current_pos == pos_occ:
                if len(self.existing_results[name]) > 0:
                    self.existing_results[name].append(self.existing_results[name][-1])
                # if no data until i timestamp use the first occurence as value
                else:
                    self.existing_results[name].append(occurrences[pos_occ][0])
            # if multiple values in between two timestamps use the mean of them as value
            else:
                if i == len(target_series) - 1:
                    dataInBetween = [value for value, time in occurrences[pos_occ:]]
                    self.existing_results[name].append(sum(dataInBetween) / len(dataInBetween))
                else:
                    dataInBetween = [value for value, time in occurrences[pos_occ:current_pos]]
                    self.existing_results[name].append(sum(dataInBetween) / len(dataInBetween))
            self.existing_timestamps[name].append(target_series[i][1])

            # if no other occurrences just repeat the last value
            if current_pos == len(occurrences):
                for k in range(i + 1, len(target_series)):
                    self.existing_results[name].append(self.existing_results[name][-1])
                    self.existing_timestamps[name].append(target_series[k][1])
                break
            pos_occ = current_pos
        self.trim(name, target_series)

        return [self.existing_results[name]], [name]

Classes

class map_base
Expand source code
class map_base():
    def __init__(self):
        self.existing_results = {}
        self.existing_timestamps = {}
        self.last_occurance = {}

    def find_pos(self, name, occurrences, target_series):
        if name not in self.existing_results.keys():
            self.existing_results[name] = []
            self.existing_timestamps[name] = []
            self.last_occurance[name] = None
            pos_occ = 0
            pos_target = 0
        else:
            pos_occ = len(occurrences)
            for i in range(len(occurrences) - 1, -1, -1):
                pos_occ = i
                if occurrences[i][1] <= self.last_occurance[name]:
                    pos_occ += 1
                    break
            pos_target = len(target_series)
            for i in range(len(target_series) - 1, -1, -1):
                pos_target = i
                if target_series[i][1] <= self.existing_timestamps[name][-1]:
                    pos_target += 1
                    break
        self.last_occurance[name] = occurrences[-1][1]
        return pos_target, pos_occ

    def trim(self, name, target_series):
        self.existing_timestamps[name] = self.existing_timestamps[name][-len(target_series):]
        self.existing_results[name] = self.existing_results[name][-len(target_series):]

Subclasses

Methods

def find_pos(self, name, occurrences, target_series)
Expand source code
def find_pos(self, name, occurrences, target_series):
    if name not in self.existing_results.keys():
        self.existing_results[name] = []
        self.existing_timestamps[name] = []
        self.last_occurance[name] = None
        pos_occ = 0
        pos_target = 0
    else:
        pos_occ = len(occurrences)
        for i in range(len(occurrences) - 1, -1, -1):
            pos_occ = i
            if occurrences[i][1] <= self.last_occurance[name]:
                pos_occ += 1
                break
        pos_target = len(target_series)
        for i in range(len(target_series) - 1, -1, -1):
            pos_target = i
            if target_series[i][1] <= self.existing_timestamps[name][-1]:
                pos_target += 1
                break
    self.last_occurance[name] = occurrences[-1][1]
    return pos_target, pos_occ
def trim(self, name, target_series)
Expand source code
def trim(self, name, target_series):
    self.existing_timestamps[name] = self.existing_timestamps[name][-len(target_series):]
    self.existing_results[name] = self.existing_results[name][-len(target_series):]
class map_categorical_to_continuous

Wrapper Class for mapping categorical to continuous time-series.

Expand source code
class map_categorical_to_continuous:
    """
    Wrapper Class for mapping categorical to continuous time-series.

    """

    def __init__(self):
        pass

    def map(self, target_series, occurrences, name):
        """
        This method is used to generate context time series of categorical type.

        The way we do this is by generating a time series of each different category, by creating a zero and one series,
        similar to "isolated" type, by filling ones in the timestamps that each category appears. Finally, we create an
        additional series with the name state_{name}, having zeros until the occurrence of the last category, and filled
        with ones afterward.

        **Parameters**:

        **target_series**: Used to align sample rate.

        **occurrences**: a list of tuple with timestamps and categorical value, refering to the observed value of a
        categorical source.

        **name**: name of the categorical source.


        **return**: A list of time-series to populate CD part of the context.
        """
        vector = [[0] for i in range(len(target_series))]
        pos = 0
        unique_categories = set([occ[0] for occ in occurrences])
        # this is to aling the series in case of different sample Rate
        for i in range(len(target_series)):
            timestamp = target_series[i][1]
            current_pos = pos
            for q in range(pos, len(occurrences)):
                if occurrences[q][1] > timestamp:
                    current_pos = q
                    break
            # no data found

            if current_pos == pos:
                # if no data in betwwen values use the previus value
                if i > 0:
                    vector[i] = [occurrences[-1][0]]
                # if no data until i timestamp use the first occurence as value
                else:
                    vector[i] = [occurrences[0][0]]
            # if multiple values in between two timestamps use the last as value
            else:
                dataInBetween = [value for value, time in occurrences[pos:current_pos]]
                vector[i] = [v for v in set(dataInBetween)]
            # if no other occurrences just repeat the last value
            if current_pos == len(occurrences):
                for k in range(i + 1, len(vector)):
                    vector[k] = [occurrences[-1][0]]
                break
            pos = current_pos
        all_vectors = []
        all_names = []
        # one-hot encoding of unique categories
        for value in unique_categories:
            in_vector = [1 if value in v else 0 for v in vector]
            if len(set(in_vector)) == 1:
                in_vector[0] = 0
            all_vectors.append(in_vector)
            all_names.append(f"{value}_{name}")
        # create of the state variable
        state_vector = [0 for i in range(len(target_series))]
        lastv = occurrences[-1][0]
        ## not stable
        for i in range(len(state_vector) - 1, -1, -1):
            if lastv in vector[i]:
                state_vector[i] = 1
            else:
                break
        all_vectors.append(state_vector)
        all_names.append(f"state_{name}")
        return all_vectors, all_names

Methods

def map(self, target_series, occurrences, name)

This method is used to generate context time series of categorical type.

The way we do this is by generating a time series of each different category, by creating a zero and one series, similar to "isolated" type, by filling ones in the timestamps that each category appears. Finally, we create an additional series with the name state_{name}, having zeros until the occurrence of the last category, and filled with ones afterward.

Parameters:

target_series: Used to align sample rate.

occurrences: a list of tuple with timestamps and categorical value, refering to the observed value of a categorical source.

name: name of the categorical source.

return: A list of time-series to populate CD part of the context.

Expand source code
def map(self, target_series, occurrences, name):
    """
    This method is used to generate context time series of categorical type.

    The way we do this is by generating a time series of each different category, by creating a zero and one series,
    similar to "isolated" type, by filling ones in the timestamps that each category appears. Finally, we create an
    additional series with the name state_{name}, having zeros until the occurrence of the last category, and filled
    with ones afterward.

    **Parameters**:

    **target_series**: Used to align sample rate.

    **occurrences**: a list of tuple with timestamps and categorical value, refering to the observed value of a
    categorical source.

    **name**: name of the categorical source.


    **return**: A list of time-series to populate CD part of the context.
    """
    vector = [[0] for i in range(len(target_series))]
    pos = 0
    unique_categories = set([occ[0] for occ in occurrences])
    # this is to aling the series in case of different sample Rate
    for i in range(len(target_series)):
        timestamp = target_series[i][1]
        current_pos = pos
        for q in range(pos, len(occurrences)):
            if occurrences[q][1] > timestamp:
                current_pos = q
                break
        # no data found

        if current_pos == pos:
            # if no data in betwwen values use the previus value
            if i > 0:
                vector[i] = [occurrences[-1][0]]
            # if no data until i timestamp use the first occurence as value
            else:
                vector[i] = [occurrences[0][0]]
        # if multiple values in between two timestamps use the last as value
        else:
            dataInBetween = [value for value, time in occurrences[pos:current_pos]]
            vector[i] = [v for v in set(dataInBetween)]
        # if no other occurrences just repeat the last value
        if current_pos == len(occurrences):
            for k in range(i + 1, len(vector)):
                vector[k] = [occurrences[-1][0]]
            break
        pos = current_pos
    all_vectors = []
    all_names = []
    # one-hot encoding of unique categories
    for value in unique_categories:
        in_vector = [1 if value in v else 0 for v in vector]
        if len(set(in_vector)) == 1:
            in_vector[0] = 0
        all_vectors.append(in_vector)
        all_names.append(f"{value}_{name}")
    # create of the state variable
    state_vector = [0 for i in range(len(target_series))]
    lastv = occurrences[-1][0]
    ## not stable
    for i in range(len(state_vector) - 1, -1, -1):
        if lastv in vector[i]:
            state_vector[i] = 1
        else:
            break
    all_vectors.append(state_vector)
    all_names.append(f"state_{name}")
    return all_vectors, all_names
class map_configuration_to_continuous

Wrapper Class for mapping configuration events (defined as events with constant impact) to continuous time-series.

Expand source code
class map_configuration_to_continuous(map_base):
    """
        Wrapper Class for mapping configuration events (defined as events with constant impact) to continuous time-series.

    """

    def __init__(self):
        super().__init__()
        self.existing_results = {}
        self.base = {}

    def map(self, target_series, occurrences, name):
        """
         Configuration events, refers to configuration changes or events that alter the state of the monitored asset.
          To transform these events into continuous signals, we start with a series of 0s, and after each occurrence of
          such an event, we add 1 to all the positions after the occurrence's timestamp

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: Contain time stamps of the occurrences of an isolated type source.

        **return**: A binary time series with same size as target_series, that models the occurrences
        of the provided Configuration source, to populate CD part of the context.
        """
        if name not in self.base.keys():
            self.base[name] = 0
        pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

        for i in range(pos_target, len(target_series)):
            if pos_occ < len(occurrences) and occurrences[pos_occ][1] <= target_series[i][1]:
                self.base[name] += 1
                self.base[name] = self.base[name] % (len(target_series) + 1)
                self.existing_results[name].append(self.base[name])
                self.existing_timestamps[name].append(target_series[i][1])
                # finde next occurance after target_series[ti][1] timestamp
                while occurrences[pos_occ][1] <= target_series[i][1] and pos_occ < len(occurrences) - 1:
                    pos_occ += 1
                if occurrences[pos_occ][1] <= target_series[i][
                    1]:  # no other occurencies after target_series[ti][1] timestamp
                    for innert1 in range(i + 1, len(target_series)):
                        self.existing_results[name].append(self.base[name])
                        self.existing_timestamps[name].append(target_series[innert1][1])
                    break
            else:
                self.existing_results[name].append(self.base[name])
                self.existing_timestamps[name].append(target_series[i][1])
        self.trim(name, target_series)

        return [self.existing_results[name]], [name]

Ancestors

Methods

def map(self, target_series, occurrences, name)

Configuration events, refers to configuration changes or events that alter the state of the monitored asset. To transform these events into continuous signals, we start with a series of 0s, and after each occurrence of such an event, we add 1 to all the positions after the occurrence's timestamp

Parameters:

target_series: Used to align sample rate of the continuous series.

occurrences: Contain time stamps of the occurrences of an isolated type source.

return: A binary time series with same size as target_series, that models the occurrences of the provided Configuration source, to populate CD part of the context.

Expand source code
def map(self, target_series, occurrences, name):
    """
     Configuration events, refers to configuration changes or events that alter the state of the monitored asset.
      To transform these events into continuous signals, we start with a series of 0s, and after each occurrence of
      such an event, we add 1 to all the positions after the occurrence's timestamp

     **Parameters**:

    **target_series**: Used to align sample rate of the continuous series.

    **occurrences**: Contain time stamps of the occurrences of an isolated type source.

    **return**: A binary time series with same size as target_series, that models the occurrences
    of the provided Configuration source, to populate CD part of the context.
    """
    if name not in self.base.keys():
        self.base[name] = 0
    pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

    for i in range(pos_target, len(target_series)):
        if pos_occ < len(occurrences) and occurrences[pos_occ][1] <= target_series[i][1]:
            self.base[name] += 1
            self.base[name] = self.base[name] % (len(target_series) + 1)
            self.existing_results[name].append(self.base[name])
            self.existing_timestamps[name].append(target_series[i][1])
            # finde next occurance after target_series[ti][1] timestamp
            while occurrences[pos_occ][1] <= target_series[i][1] and pos_occ < len(occurrences) - 1:
                pos_occ += 1
            if occurrences[pos_occ][1] <= target_series[i][
                1]:  # no other occurencies after target_series[ti][1] timestamp
                for innert1 in range(i + 1, len(target_series)):
                    self.existing_results[name].append(self.base[name])
                    self.existing_timestamps[name].append(target_series[innert1][1])
                break
        else:
            self.existing_results[name].append(self.base[name])
            self.existing_timestamps[name].append(target_series[i][1])
    self.trim(name, target_series)

    return [self.existing_results[name]], [name]
class map_configuration_to_continuous_deprecated

Wrapper Class for mapping configuration events (defined as events with constant impact) to continuous time-series.

Expand source code
class map_configuration_to_continuous_deprecated:
    """
        Wrapper Class for mapping configuration events (defined as events with constant impact) to continuous time-series.

    """

    def __init__(self):
        pass

    def map(self, target_series, occurrences, name):
        """
         Configuration events, refers to configuration changes or events that alter the state of the monitored asset.
          To transform these events into continuous signals, we start with a series of 0s, and after each occurrence of
          such an event, we add 1 to all the positions after the occurrence's timestamp

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: Contain time stamps of the occurrences of an isolated type source.

        **return**: A binary time series with same size as target_series, that models the occurrences
        of the provided Configuration source, to populate CD part of the context.
        """
        vector = [0 for i in range(len(target_series))]
        ci = 0
        base = 0
        for ti in range(len(target_series)):
            if occurrences[ci][1] <= target_series[ti][1]:
                base += 1
                # finde next occurance after target_series[ti][1] timestamp
                while occurrences[ci][1] <= target_series[ti][1] and ci < len(occurrences) - 1:
                    ci += 1
                if occurrences[ci][1] <= target_series[ti][
                    1]:  # no other occurencies after target_series[ti][1] timestamp
                    for innert1 in range(ti, len(target_series)):
                        vector[innert1] = base
                    break
            vector[ti] = base
        ## not stable
        if len(set(vector)) == 1:
            vector[0] = 0
        return [vector], [name]

Methods

def map(self, target_series, occurrences, name)

Configuration events, refers to configuration changes or events that alter the state of the monitored asset. To transform these events into continuous signals, we start with a series of 0s, and after each occurrence of such an event, we add 1 to all the positions after the occurrence's timestamp

Parameters:

target_series: Used to align sample rate of the continuous series.

occurrences: Contain time stamps of the occurrences of an isolated type source.

return: A binary time series with same size as target_series, that models the occurrences of the provided Configuration source, to populate CD part of the context.

Expand source code
def map(self, target_series, occurrences, name):
    """
     Configuration events, refers to configuration changes or events that alter the state of the monitored asset.
      To transform these events into continuous signals, we start with a series of 0s, and after each occurrence of
      such an event, we add 1 to all the positions after the occurrence's timestamp

     **Parameters**:

    **target_series**: Used to align sample rate of the continuous series.

    **occurrences**: Contain time stamps of the occurrences of an isolated type source.

    **return**: A binary time series with same size as target_series, that models the occurrences
    of the provided Configuration source, to populate CD part of the context.
    """
    vector = [0 for i in range(len(target_series))]
    ci = 0
    base = 0
    for ti in range(len(target_series)):
        if occurrences[ci][1] <= target_series[ti][1]:
            base += 1
            # finde next occurance after target_series[ti][1] timestamp
            while occurrences[ci][1] <= target_series[ti][1] and ci < len(occurrences) - 1:
                ci += 1
            if occurrences[ci][1] <= target_series[ti][
                1]:  # no other occurencies after target_series[ti][1] timestamp
                for innert1 in range(ti, len(target_series)):
                    vector[innert1] = base
                break
        vector[ti] = base
    ## not stable
    if len(set(vector)) == 1:
        vector[0] = 0
    return [vector], [name]
class map_isolated_to_continuous

Wrapper Class for mapping isolated events (defined as events with instant impact) to continuous time-series.

Expand source code
class map_isolated_to_continuous(map_base):
    """
    Wrapper Class for mapping isolated events (defined as events with instant impact) to continuous time-series.
    """

    def __init__(self):
        super().__init__()

    def map(self, target_series, occurrences, name):
        """
         Isolated events are discrete events that have an immediate impact on the behavior of the asset.
         To transform such events into a continuous representation, we start with a series of 0s as an initial signal
         and assign 1 to the position corresponding to the timestamps of the events. If the event timestamp does
         not match any target_series timestamps, it is mapped to the closest timestamp in target_series.

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: Contain time stamps of the occurrences of an isolated type source.

        **return**: A binary time series with same size as target_series, that models the occurrences
        of the provided isolated source, to populate CD part of the context.
        """
        pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

        for i in range(pos_target, len(target_series)):
            if pos_occ < len(occurrences) and occurrences[pos_occ][1] <= target_series[i][1]:
                self.existing_results[name].append(1)
                self.existing_timestamps[name].append(target_series[i][1])
                # finde next occurance after target_series[ti][1] timestamp
                while occurrences[pos_occ][1] <= target_series[i][1] and pos_occ < len(occurrences) - 1:
                    pos_occ += 1
                if occurrences[pos_occ][1] <= target_series[i][
                    1]:  # no other occurencies after target_series[ti][1] timestamp
                    for innert1 in range(i + 1, len(target_series)):
                        self.existing_results[name].append(0)
                        self.existing_timestamps[name].append(target_series[innert1][1])
                    break
            else:
                self.existing_results[name].append(0)
                self.existing_timestamps[name].append(target_series[i][1])
        self.trim(name, target_series)

        return [self.existing_results[name]], [name]

Ancestors

Methods

def map(self, target_series, occurrences, name)

Isolated events are discrete events that have an immediate impact on the behavior of the asset. To transform such events into a continuous representation, we start with a series of 0s as an initial signal and assign 1 to the position corresponding to the timestamps of the events. If the event timestamp does not match any target_series timestamps, it is mapped to the closest timestamp in target_series.

Parameters:

target_series: Used to align sample rate of the continuous series.

occurrences: Contain time stamps of the occurrences of an isolated type source.

return: A binary time series with same size as target_series, that models the occurrences of the provided isolated source, to populate CD part of the context.

Expand source code
def map(self, target_series, occurrences, name):
    """
     Isolated events are discrete events that have an immediate impact on the behavior of the asset.
     To transform such events into a continuous representation, we start with a series of 0s as an initial signal
     and assign 1 to the position corresponding to the timestamps of the events. If the event timestamp does
     not match any target_series timestamps, it is mapped to the closest timestamp in target_series.

     **Parameters**:

    **target_series**: Used to align sample rate of the continuous series.

    **occurrences**: Contain time stamps of the occurrences of an isolated type source.

    **return**: A binary time series with same size as target_series, that models the occurrences
    of the provided isolated source, to populate CD part of the context.
    """
    pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

    for i in range(pos_target, len(target_series)):
        if pos_occ < len(occurrences) and occurrences[pos_occ][1] <= target_series[i][1]:
            self.existing_results[name].append(1)
            self.existing_timestamps[name].append(target_series[i][1])
            # finde next occurance after target_series[ti][1] timestamp
            while occurrences[pos_occ][1] <= target_series[i][1] and pos_occ < len(occurrences) - 1:
                pos_occ += 1
            if occurrences[pos_occ][1] <= target_series[i][
                1]:  # no other occurencies after target_series[ti][1] timestamp
                for innert1 in range(i + 1, len(target_series)):
                    self.existing_results[name].append(0)
                    self.existing_timestamps[name].append(target_series[innert1][1])
                break
        else:
            self.existing_results[name].append(0)
            self.existing_timestamps[name].append(target_series[i][1])
    self.trim(name, target_series)

    return [self.existing_results[name]], [name]
class map_isolated_to_continuous_deprecated

Wrapper Class for mapping isolated events (defined as events with instant impact) to continuous time-series.

Expand source code
class map_isolated_to_continuous_deprecated:
    """
           Wrapper Class for mapping isolated events (defined as events with instant impact) to continuous time-series.

    """

    def __init__(self):
        pass

    def map(self, target_series, occurrences, name):
        """
         Isolated events are discrete events that have an immediate impact on the behavior of the asset.
         To transform such events into a continuous representation, we start with a series of 0s as an initial signal
         and assign 1 to the position corresponding to the timestamps of the events. If the event timestamp does
         not match any target_series timestamps, it is mapped to the closest timestamp in target_series.

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: Contain time stamps of the occurrences of an isolated type source.

        **return**: A binary time series with same size as target_series, that models the occurrences
        of the provided isolated source, to populate CD part of the context.
        """
        ci = 0
        vector = [0 for i in range(len(target_series))]
        for ti in range(len(target_series)):
            if occurrences[ci][1] <= target_series[ti][1]:
                vector[ti] = 1
                # finde next occurance after target_series[ti][1] timestamp
                while occurrences[ci][1] <= target_series[ti][1] and ci < len(occurrences) - 1:
                    ci += 1
                if occurrences[ci][1] <= target_series[ti][
                    1]:  # no other occurencies after target_series[ti][1] timestamp
                    break
        return [vector], [name]

Methods

def map(self, target_series, occurrences, name)

Isolated events are discrete events that have an immediate impact on the behavior of the asset. To transform such events into a continuous representation, we start with a series of 0s as an initial signal and assign 1 to the position corresponding to the timestamps of the events. If the event timestamp does not match any target_series timestamps, it is mapped to the closest timestamp in target_series.

Parameters:

target_series: Used to align sample rate of the continuous series.

occurrences: Contain time stamps of the occurrences of an isolated type source.

return: A binary time series with same size as target_series, that models the occurrences of the provided isolated source, to populate CD part of the context.

Expand source code
def map(self, target_series, occurrences, name):
    """
     Isolated events are discrete events that have an immediate impact on the behavior of the asset.
     To transform such events into a continuous representation, we start with a series of 0s as an initial signal
     and assign 1 to the position corresponding to the timestamps of the events. If the event timestamp does
     not match any target_series timestamps, it is mapped to the closest timestamp in target_series.

     **Parameters**:

    **target_series**: Used to align sample rate of the continuous series.

    **occurrences**: Contain time stamps of the occurrences of an isolated type source.

    **return**: A binary time series with same size as target_series, that models the occurrences
    of the provided isolated source, to populate CD part of the context.
    """
    ci = 0
    vector = [0 for i in range(len(target_series))]
    for ti in range(len(target_series)):
        if occurrences[ci][1] <= target_series[ti][1]:
            vector[ti] = 1
            # finde next occurance after target_series[ti][1] timestamp
            while occurrences[ci][1] <= target_series[ti][1] and ci < len(occurrences) - 1:
                ci += 1
            if occurrences[ci][1] <= target_series[ti][
                1]:  # no other occurencies after target_series[ti][1] timestamp
                break
    return [vector], [name]
class map_univariate_to_continuous

Wrapper Class for mapping univariate time-series (defined as events with instant impact) to continuous time-series with same frequency as a target time-series.

Expand source code
class map_univariate_to_continuous(map_base):
    """
        Wrapper Class for mapping univariate time-series (defined as events with instant impact) to continuous time-series with same frequency as a target time-series.
    """

    def __init__(self):
        super().__init__()

    def map(self, target_series, occurrences, name):
        """
        For continuous data sources, we simply collect the values within the time window.
        Although the time window is the same for all sources, each source may have a different sample rate.
        To create a signal of the same size as target_series, we perform mean aggregation if a source has a higher
        sample rate than target_series, using the mean value of the data between each timestamp of the target_series.

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: The univariate time series.

        **return**: A time series with same size as target_series, to populate CD part of context.
        """
        pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

        for i in range(pos_target, len(target_series)):

            current_pos = len(occurrences)
            for q in range(pos_occ, len(occurrences)):
                if occurrences[q][1] > target_series[i][1]:
                    current_pos = q
                    break
            if current_pos == pos_occ:
                if len(self.existing_results[name]) > 0:
                    self.existing_results[name].append(self.existing_results[name][-1])
                # if no data until i timestamp use the first occurence as value
                else:
                    self.existing_results[name].append(occurrences[pos_occ][0])
            # if multiple values in between two timestamps use the mean of them as value
            else:
                if i == len(target_series) - 1:
                    dataInBetween = [value for value, time in occurrences[pos_occ:]]
                    self.existing_results[name].append(sum(dataInBetween) / len(dataInBetween))
                else:
                    dataInBetween = [value for value, time in occurrences[pos_occ:current_pos]]
                    self.existing_results[name].append(sum(dataInBetween) / len(dataInBetween))
            self.existing_timestamps[name].append(target_series[i][1])

            # if no other occurrences just repeat the last value
            if current_pos == len(occurrences):
                for k in range(i + 1, len(target_series)):
                    self.existing_results[name].append(self.existing_results[name][-1])
                    self.existing_timestamps[name].append(target_series[k][1])
                break
            pos_occ = current_pos
        self.trim(name, target_series)

        return [self.existing_results[name]], [name]

Ancestors

Methods

def map(self, target_series, occurrences, name)

For continuous data sources, we simply collect the values within the time window. Although the time window is the same for all sources, each source may have a different sample rate. To create a signal of the same size as target_series, we perform mean aggregation if a source has a higher sample rate than target_series, using the mean value of the data between each timestamp of the target_series.

Parameters:

target_series: Used to align sample rate of the continuous series.

occurrences: The univariate time series.

return: A time series with same size as target_series, to populate CD part of context.

Expand source code
def map(self, target_series, occurrences, name):
    """
    For continuous data sources, we simply collect the values within the time window.
    Although the time window is the same for all sources, each source may have a different sample rate.
    To create a signal of the same size as target_series, we perform mean aggregation if a source has a higher
    sample rate than target_series, using the mean value of the data between each timestamp of the target_series.

     **Parameters**:

    **target_series**: Used to align sample rate of the continuous series.

    **occurrences**: The univariate time series.

    **return**: A time series with same size as target_series, to populate CD part of context.
    """
    pos_target, pos_occ = self.find_pos(name, occurrences, target_series)

    for i in range(pos_target, len(target_series)):

        current_pos = len(occurrences)
        for q in range(pos_occ, len(occurrences)):
            if occurrences[q][1] > target_series[i][1]:
                current_pos = q
                break
        if current_pos == pos_occ:
            if len(self.existing_results[name]) > 0:
                self.existing_results[name].append(self.existing_results[name][-1])
            # if no data until i timestamp use the first occurence as value
            else:
                self.existing_results[name].append(occurrences[pos_occ][0])
        # if multiple values in between two timestamps use the mean of them as value
        else:
            if i == len(target_series) - 1:
                dataInBetween = [value for value, time in occurrences[pos_occ:]]
                self.existing_results[name].append(sum(dataInBetween) / len(dataInBetween))
            else:
                dataInBetween = [value for value, time in occurrences[pos_occ:current_pos]]
                self.existing_results[name].append(sum(dataInBetween) / len(dataInBetween))
        self.existing_timestamps[name].append(target_series[i][1])

        # if no other occurrences just repeat the last value
        if current_pos == len(occurrences):
            for k in range(i + 1, len(target_series)):
                self.existing_results[name].append(self.existing_results[name][-1])
                self.existing_timestamps[name].append(target_series[k][1])
            break
        pos_occ = current_pos
    self.trim(name, target_series)

    return [self.existing_results[name]], [name]
class map_univariate_to_continuous_deprecated

Wrapper Class for mapping univariate time-series (defined as events with instant impact) to continuous time-series with same frequency as a target time-series.

Expand source code
class map_univariate_to_continuous_deprecated:
    """
        Wrapper Class for mapping univariate time-series (defined as events with instant impact) to continuous time-series with same frequency as a target time-series.
    """

    def __init__(self):
        self.existing_results = {}

    def map(self, target_series, occurrences, name):
        """
        For continuous data sources, we simply collect the values within the time window.
        Although the time window is the same for all sources, each source may have a different sample rate.
        To create a signal of the same size as target_series, we perform mean aggregation if a source has a higher
        sample rate than target_series, using the mean value of the data between each timestamp of the target_series.

         **Parameters**:

        **target_series**: Used to align sample rate of the continuous series.

        **occurrences**: The univariate time series.

        **return**: A time series with same size as target_series, to populate CD part of context.
        """

        if name not in self.existing_results.keys():
            self.existing_results[name] = []
        # position of first timestamp of target series in aggregated data
        spos = -1
        for tup in self.existing_results[name]:
            spos += 1
            if tup[1] >= target_series[0][1]:
                break

        self.existing_results[name] = self.existing_results[name][spos:]

        pos = 0
        if len(self.existing_results[name]) > 0:
            for q in range(len(occurrences)):
                pos = q
                if occurrences[q][1] > self.existing_results[name][-1][1]:
                    break

        vector = [tup[0] for tup in self.existing_results[name]] + [0 for i in range(
            len(target_series) - len(self.existing_results[name]))]

        for i in range(len(self.existing_results[name]), len(target_series)):
            timestamp = target_series[i][1]
            current_pos = pos
            for q in range(pos, len(occurrences)):
                if occurrences[q][1] > timestamp:
                    current_pos = q
                    break
            if i == len(target_series) - 1:
                current_pos = len(occurrences) + 1
            # no data found
            if current_pos == pos:
                # if no data in betwwen values use the previus value
                if i > 0:
                    vector[i] = vector[i - 1]
                # if no data until i timestamp use the first occurence as value
                else:
                    vector[i] = occurrences[0][0]
            # if multiple values in between two timestamps use the mean of them as value
            else:
                dataInBetween = [value for value, time in occurrences[pos:current_pos]]
                vector[i] = sum(dataInBetween) / len(dataInBetween)
            # if no other occurrences just repeat the last value
            if current_pos == len(occurrences):
                for k in range(i + 1, len(vector)):
                    vector[k] = vector[k - 1]
                break
            pos = current_pos
        self.existing_results[name] = [(v, tup[1]) for v, tup in zip(vector, target_series)]
        return [vector], [name]

Methods

def map(self, target_series, occurrences, name)

For continuous data sources, we simply collect the values within the time window. Although the time window is the same for all sources, each source may have a different sample rate. To create a signal of the same size as target_series, we perform mean aggregation if a source has a higher sample rate than target_series, using the mean value of the data between each timestamp of the target_series.

Parameters:

target_series: Used to align sample rate of the continuous series.

occurrences: The univariate time series.

return: A time series with same size as target_series, to populate CD part of context.

Expand source code
def map(self, target_series, occurrences, name):
    """
    For continuous data sources, we simply collect the values within the time window.
    Although the time window is the same for all sources, each source may have a different sample rate.
    To create a signal of the same size as target_series, we perform mean aggregation if a source has a higher
    sample rate than target_series, using the mean value of the data between each timestamp of the target_series.

     **Parameters**:

    **target_series**: Used to align sample rate of the continuous series.

    **occurrences**: The univariate time series.

    **return**: A time series with same size as target_series, to populate CD part of context.
    """

    if name not in self.existing_results.keys():
        self.existing_results[name] = []
    # position of first timestamp of target series in aggregated data
    spos = -1
    for tup in self.existing_results[name]:
        spos += 1
        if tup[1] >= target_series[0][1]:
            break

    self.existing_results[name] = self.existing_results[name][spos:]

    pos = 0
    if len(self.existing_results[name]) > 0:
        for q in range(len(occurrences)):
            pos = q
            if occurrences[q][1] > self.existing_results[name][-1][1]:
                break

    vector = [tup[0] for tup in self.existing_results[name]] + [0 for i in range(
        len(target_series) - len(self.existing_results[name]))]

    for i in range(len(self.existing_results[name]), len(target_series)):
        timestamp = target_series[i][1]
        current_pos = pos
        for q in range(pos, len(occurrences)):
            if occurrences[q][1] > timestamp:
                current_pos = q
                break
        if i == len(target_series) - 1:
            current_pos = len(occurrences) + 1
        # no data found
        if current_pos == pos:
            # if no data in betwwen values use the previus value
            if i > 0:
                vector[i] = vector[i - 1]
            # if no data until i timestamp use the first occurence as value
            else:
                vector[i] = occurrences[0][0]
        # if multiple values in between two timestamps use the mean of them as value
        else:
            dataInBetween = [value for value, time in occurrences[pos:current_pos]]
            vector[i] = sum(dataInBetween) / len(dataInBetween)
        # if no other occurrences just repeat the last value
        if current_pos == len(occurrences):
            for k in range(i + 1, len(vector)):
                vector[k] = vector[k - 1]
            break
        pos = current_pos
    self.existing_results[name] = [(v, tup[1]) for v, tup in zip(vector, target_series)]
    return [vector], [name]