Skip to content

Modules

model

Classes

Modules

sample

Provide a description of a sample.

Classes
Sample

Bases: NamedTuple

Define a sample.

Source code in src/taxpasta/domain/model/sample.py
class Sample(NamedTuple):
    """Define a sample."""

    name: str
    profile: DataFrame[StandardProfile]
Attributes
name: str instance-attribute
profile: DataFrame[StandardProfile] instance-attribute
standard_profile

Provide a description of the standard profile format.

Classes
StandardProfile

Bases: DataFrameModel

Define the standard profile format.

Source code in src/taxpasta/domain/model/standard_profile.py
class StandardProfile(pa.DataFrameModel):
    """Define the standard profile format."""

    taxonomy_id: Series[pd.CategoricalDtype] = pa.Field()
    count: Series[int] = pa.Field(ge=0)

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True
Attributes
count: Series[int] = pa.Field(ge=0) class-attribute instance-attribute
taxonomy_id: Series[pd.CategoricalDtype] = pa.Field() class-attribute instance-attribute
Classes
Config

Configure the schema model.

Source code in src/taxpasta/domain/model/standard_profile.py
class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True
Attributes
coerce = True class-attribute instance-attribute
ordered = True class-attribute instance-attribute
strict = True class-attribute instance-attribute
tidy_observation_table

Provide a description of a tidy observation table.

Classes
TidyObservationTable

Bases: DataFrameModel

Define the tidy observation table.

Source code in src/taxpasta/domain/model/tidy_observation_table.py
class TidyObservationTable(pa.DataFrameModel):
    """Define the tidy observation table."""

    taxonomy_id: Series[pd.CategoricalDtype] = pa.Field()
    name: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    rank: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    id_lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    rank_lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    count: Series[np.int64] = pa.Field(ge=0)
    sample: Series[pd.CategoricalDtype] = pa.Field()

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True
Attributes
count: Series[np.int64] = pa.Field(ge=0) class-attribute instance-attribute
id_lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
name: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
rank: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
rank_lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
sample: Series[pd.CategoricalDtype] = pa.Field() class-attribute instance-attribute
taxonomy_id: Series[pd.CategoricalDtype] = pa.Field() class-attribute instance-attribute
Classes
Config

Configure the schema model.

Source code in src/taxpasta/domain/model/tidy_observation_table.py
class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True
Attributes
coerce = True class-attribute instance-attribute
ordered = True class-attribute instance-attribute
strict = True class-attribute instance-attribute
wide_observation_table

Provide a description of an observation matrix.

Classes
WideObservationTable

Bases: DataFrameModel

Define the observation matrix.

Source code in src/taxpasta/domain/model/wide_observation_table.py
class WideObservationTable(pa.DataFrameModel):
    """Define the observation matrix."""

    taxonomy_id: Series[pd.CategoricalDtype] = pa.Field()
    name: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    rank: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    id_lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    rank_lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field()
    # This field uses a regex to match all columns that are not one of the above.
    any_samples: Series[np.int64] = pa.Field(
        ge=0,
        alias="^(?!(taxonomy_id|name|rank|lineage|id_lineage|rank_lineage)$).*",
        regex=True,
    )

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
Attributes
any_samples: Series[np.int64] = pa.Field(ge=0, alias='^(?!(taxonomy_id|name|rank|lineage|id_lineage|rank_lineage)$).*', regex=True) class-attribute instance-attribute
id_lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
name: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
rank: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
rank_lineage: Optional[Series[pd.CategoricalDtype]] = pa.Field() class-attribute instance-attribute
taxonomy_id: Series[pd.CategoricalDtype] = pa.Field() class-attribute instance-attribute
Classes
Config

Configure the schema model.

Source code in src/taxpasta/domain/model/wide_observation_table.py
class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
Attributes
coerce = True class-attribute instance-attribute
ordered = True class-attribute instance-attribute

service

Attributes

Classes

Modules

consensus_builder

Provide a builder service for creating a consensus between many profiles.

Classes
ConsensusBuilder

Bases: ABC

Define a builder service for creating a consensus between many profiles.

Source code in src/taxpasta/domain/service/consensus_builder.py
class ConsensusBuilder(ABC):
    """Define a builder service for creating a consensus between many profiles."""

    @abstractmethod
    def add_sample(self, sample: Sample) -> None:
        """Add a sample to the consensus builder."""

    @abstractmethod
    def build(self) -> pd.DataFrame:
        """Build the consensus between all added profiles."""
Functions
add_sample(sample: Sample) -> None abstractmethod

Add a sample to the consensus builder.

Source code in src/taxpasta/domain/service/consensus_builder.py
@abstractmethod
def add_sample(self, sample: Sample) -> None:
    """Add a sample to the consensus builder."""
build() -> pd.DataFrame abstractmethod

Build the consensus between all added profiles.

Source code in src/taxpasta/domain/service/consensus_builder.py
@abstractmethod
def build(self) -> pd.DataFrame:
    """Build the consensus between all added profiles."""
sample_merging_service

Provide a sample merging service that summarizes two or more samples.

Classes
SampleMergingService

Define a sample merging service that summarizes one or more samples.

Source code in src/taxpasta/domain/service/sample_merging_service.py
class SampleMergingService:
    """Define a sample merging service that summarizes one or more samples."""

    @classmethod
    @pa.check_types(lazy=True)
    def merge_wide(cls, samples: Iterable[Sample]) -> DataFrame[WideObservationTable]:
        """
        Merge two or more sample profiles into a wide-format observation matrix.

        Args:
            samples: Two or more samples.

        Returns:
            A single table containing one row per taxon, one column for the taxonomy
            identifier, and one column per sample with abundance counts.

        """
        # `set_index` creates a copy of the original profile which is convenient so that
        # we do not modify existing profiles but, of course, doubles the memory used.
        counts = [
            sample.profile.set_index(
                keys=StandardProfile.taxonomy_id, verify_integrity=True
            ).rename(columns={StandardProfile.count: sample.name})
            for sample in samples
        ]
        # Please note that `set_index` restores the underlying dtype of the categorical
        # column `taxonomy_id`. Thus, when we `reset_index` the column is of dtype
        # object but, due to schema coercion, this is automatically converted into a
        # categorical dtype again when pandera checks the return type.
        return (
            counts[0]
            .join(counts[1:], how="outer")
            .fillna(0)
            # We explicitly convert to int64 because of a Windows type problem.
            # See https://github.com/unionai-oss/pandera/issues/726
            .astype(np.int64)
            .reset_index()
        )

    @classmethod
    @pa.check_types(lazy=True)
    def merge_long(cls, samples: Iterable[Sample]) -> DataFrame[TidyObservationTable]:
        """
        Merge two or more sample profiles into a tidy observation table.

        Args:
            samples: Two or more samples.

        Returns:
            A single table containing three columns: taxonomy identifier, abundance
            count, and sample identifier.

        """
        # `assign` creates a copy of the original profile which is convenient so that
        # we do not modify existing profiles but, of course, doubles the memory used.
        # Please note that `concat` restores the underlying dtype of the categorical
        # column `taxonomy_id`. Thus, the column is of dtype
        # object but, due to schema coercion, this is automatically converted into a
        # categorical dtype again when pandera checks the return type. The same holds
        # for the `sample` column.
        result = pd.concat(
            [sample.profile.assign(sample=sample.name) for sample in samples],
            ignore_index=True,
            copy=False,
        )
        # We explicitly convert to int64 because of a Windows type problem.
        # See https://github.com/unionai-oss/pandera/issues/726
        result[TidyObservationTable.count] = result[TidyObservationTable.count].astype(
            np.int64
        )
        return result
Functions
merge_long(samples: Iterable[Sample]) -> DataFrame[TidyObservationTable] classmethod

Merge two or more sample profiles into a tidy observation table.

Parameters:

Name Type Description Default
samples Iterable[Sample]

Two or more samples.

required

Returns:

Type Description
DataFrame[TidyObservationTable]

A single table containing three columns: taxonomy identifier, abundance

DataFrame[TidyObservationTable]

count, and sample identifier.

Source code in src/taxpasta/domain/service/sample_merging_service.py
@classmethod
@pa.check_types(lazy=True)
def merge_long(cls, samples: Iterable[Sample]) -> DataFrame[TidyObservationTable]:
    """
    Merge two or more sample profiles into a tidy observation table.

    Args:
        samples: Two or more samples.

    Returns:
        A single table containing three columns: taxonomy identifier, abundance
        count, and sample identifier.

    """
    # `assign` creates a copy of the original profile which is convenient so that
    # we do not modify existing profiles but, of course, doubles the memory used.
    # Please note that `concat` restores the underlying dtype of the categorical
    # column `taxonomy_id`. Thus, the column is of dtype
    # object but, due to schema coercion, this is automatically converted into a
    # categorical dtype again when pandera checks the return type. The same holds
    # for the `sample` column.
    result = pd.concat(
        [sample.profile.assign(sample=sample.name) for sample in samples],
        ignore_index=True,
        copy=False,
    )
    # We explicitly convert to int64 because of a Windows type problem.
    # See https://github.com/unionai-oss/pandera/issues/726
    result[TidyObservationTable.count] = result[TidyObservationTable.count].astype(
        np.int64
    )
    return result
merge_wide(samples: Iterable[Sample]) -> DataFrame[WideObservationTable] classmethod

Merge two or more sample profiles into a wide-format observation matrix.

Parameters:

Name Type Description Default
samples Iterable[Sample]

Two or more samples.

required

Returns:

Type Description
DataFrame[WideObservationTable]

A single table containing one row per taxon, one column for the taxonomy

DataFrame[WideObservationTable]

identifier, and one column per sample with abundance counts.

Source code in src/taxpasta/domain/service/sample_merging_service.py
@classmethod
@pa.check_types(lazy=True)
def merge_wide(cls, samples: Iterable[Sample]) -> DataFrame[WideObservationTable]:
    """
    Merge two or more sample profiles into a wide-format observation matrix.

    Args:
        samples: Two or more samples.

    Returns:
        A single table containing one row per taxon, one column for the taxonomy
        identifier, and one column per sample with abundance counts.

    """
    # `set_index` creates a copy of the original profile which is convenient so that
    # we do not modify existing profiles but, of course, doubles the memory used.
    counts = [
        sample.profile.set_index(
            keys=StandardProfile.taxonomy_id, verify_integrity=True
        ).rename(columns={StandardProfile.count: sample.name})
        for sample in samples
    ]
    # Please note that `set_index` restores the underlying dtype of the categorical
    # column `taxonomy_id`. Thus, when we `reset_index` the column is of dtype
    # object but, due to schema coercion, this is automatically converted into a
    # categorical dtype again when pandera checks the return type.
    return (
        counts[0]
        .join(counts[1:], how="outer")
        .fillna(0)
        # We explicitly convert to int64 because of a Windows type problem.
        # See https://github.com/unionai-oss/pandera/issues/726
        .astype(np.int64)
        .reset_index()
    )
taxonomy_service

Provide an abstract taxonomy service interface.

Attributes
ResultTable = TypeVar('ResultTable', TidyObservationTable, WideObservationTable, StandardProfile) module-attribute
Classes
TaxonomyService

Bases: ABC

Define the abstract taxonomy service interface.

Source code in src/taxpasta/domain/service/taxonomy_service.py
class TaxonomyService(ABC):
    """Define the abstract taxonomy service interface."""

    def __init__(self, **kwargs) -> None:
        """Initialize a taxonomy service instance."""
        super().__init__(**kwargs)

    @abstractmethod
    def get_taxon_name(self, taxonomy_id: int) -> Optional[str]:
        """Return the name of a given taxonomy identifier."""

    @abstractmethod
    def get_taxon_rank(self, taxonomy_id: int) -> Optional[str]:
        """Return the rank of a given taxonomy identifier."""

    @abstractmethod
    def get_taxon_name_lineage(self, taxonomy_id: int) -> Optional[List[str]]:
        """Return the lineage of a given taxonomy identifier as names."""

    @abstractmethod
    def get_taxon_identifier_lineage(self, taxonomy_id: int) -> Optional[List[int]]:
        """Return the lineage of a given taxonomy identifier as identifiers."""

    @abstractmethod
    def get_taxon_rank_lineage(self, taxonomy_id: int) -> Optional[List[str]]:
        """Return the lineage of a given taxonomy identifier as ranks."""

    @abstractmethod
    def add_name(self, table: DataFrame[ResultTable]) -> DataFrame[ResultTable]:
        """Add a column for the taxon name to the given table."""

    @abstractmethod
    def add_rank(self, table: DataFrame[ResultTable]) -> DataFrame[ResultTable]:
        """Add a column for the taxon rank to the given table."""

    @abstractmethod
    def add_name_lineage(self, table: DataFrame[ResultTable]) -> DataFrame[ResultTable]:
        """Add a column for the taxon lineage to the given table."""

    @abstractmethod
    def add_identifier_lineage(
        self, table: DataFrame[ResultTable]
    ) -> DataFrame[ResultTable]:
        """Add a column for the taxon lineage as identifiers to the given table."""

    @abstractmethod
    def add_rank_lineage(self, table: DataFrame[ResultTable]) -> DataFrame[ResultTable]:
        """Add a column for the taxon lineage as ranks to the given table."""

    @abstractmethod
    def format_biom_taxonomy(
        self, table: DataFrame[ResultTable]
    ) -> List[Dict[str, List[str]]]:
        """Format the taxonomy as BIOM observation metadata."""

    @abstractmethod
    def summarise_at(
        self, profile: DataFrame[StandardProfile], rank: str
    ) -> DataFrame[StandardProfile]:
        """Summarise a standardised abundance profile at a higher taxonomic rank."""
Functions
__init__(**kwargs) -> None

Initialize a taxonomy service instance.

Source code in src/taxpasta/domain/service/taxonomy_service.py
def __init__(self, **kwargs) -> None:
    """Initialize a taxonomy service instance."""
    super().__init__(**kwargs)
add_identifier_lineage(table: DataFrame[ResultTable]) -> DataFrame[ResultTable] abstractmethod

Add a column for the taxon lineage as identifiers to the given table.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def add_identifier_lineage(
    self, table: DataFrame[ResultTable]
) -> DataFrame[ResultTable]:
    """Add a column for the taxon lineage as identifiers to the given table."""
add_name(table: DataFrame[ResultTable]) -> DataFrame[ResultTable] abstractmethod

Add a column for the taxon name to the given table.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def add_name(self, table: DataFrame[ResultTable]) -> DataFrame[ResultTable]:
    """Add a column for the taxon name to the given table."""
add_name_lineage(table: DataFrame[ResultTable]) -> DataFrame[ResultTable] abstractmethod

Add a column for the taxon lineage to the given table.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def add_name_lineage(self, table: DataFrame[ResultTable]) -> DataFrame[ResultTable]:
    """Add a column for the taxon lineage to the given table."""
add_rank(table: DataFrame[ResultTable]) -> DataFrame[ResultTable] abstractmethod

Add a column for the taxon rank to the given table.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def add_rank(self, table: DataFrame[ResultTable]) -> DataFrame[ResultTable]:
    """Add a column for the taxon rank to the given table."""
add_rank_lineage(table: DataFrame[ResultTable]) -> DataFrame[ResultTable] abstractmethod

Add a column for the taxon lineage as ranks to the given table.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def add_rank_lineage(self, table: DataFrame[ResultTable]) -> DataFrame[ResultTable]:
    """Add a column for the taxon lineage as ranks to the given table."""
format_biom_taxonomy(table: DataFrame[ResultTable]) -> List[Dict[str, List[str]]] abstractmethod

Format the taxonomy as BIOM observation metadata.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def format_biom_taxonomy(
    self, table: DataFrame[ResultTable]
) -> List[Dict[str, List[str]]]:
    """Format the taxonomy as BIOM observation metadata."""
get_taxon_identifier_lineage(taxonomy_id: int) -> Optional[List[int]] abstractmethod

Return the lineage of a given taxonomy identifier as identifiers.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def get_taxon_identifier_lineage(self, taxonomy_id: int) -> Optional[List[int]]:
    """Return the lineage of a given taxonomy identifier as identifiers."""
get_taxon_name(taxonomy_id: int) -> Optional[str] abstractmethod

Return the name of a given taxonomy identifier.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def get_taxon_name(self, taxonomy_id: int) -> Optional[str]:
    """Return the name of a given taxonomy identifier."""
get_taxon_name_lineage(taxonomy_id: int) -> Optional[List[str]] abstractmethod

Return the lineage of a given taxonomy identifier as names.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def get_taxon_name_lineage(self, taxonomy_id: int) -> Optional[List[str]]:
    """Return the lineage of a given taxonomy identifier as names."""
get_taxon_rank(taxonomy_id: int) -> Optional[str] abstractmethod

Return the rank of a given taxonomy identifier.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def get_taxon_rank(self, taxonomy_id: int) -> Optional[str]:
    """Return the rank of a given taxonomy identifier."""
get_taxon_rank_lineage(taxonomy_id: int) -> Optional[List[str]] abstractmethod

Return the lineage of a given taxonomy identifier as ranks.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def get_taxon_rank_lineage(self, taxonomy_id: int) -> Optional[List[str]]:
    """Return the lineage of a given taxonomy identifier as ranks."""
summarise_at(profile: DataFrame[StandardProfile], rank: str) -> DataFrame[StandardProfile] abstractmethod

Summarise a standardised abundance profile at a higher taxonomic rank.

Source code in src/taxpasta/domain/service/taxonomy_service.py
@abstractmethod
def summarise_at(
    self, profile: DataFrame[StandardProfile], rank: str
) -> DataFrame[StandardProfile]:
    """Summarise a standardised abundance profile at a higher taxonomic rank."""