Infrastructure

Define a standardisation service for Bracken profiles.

Source code in taxpasta/infrastructure/application/bracken/bracken_profile_standardisation_service.py

class BrackenProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for Bracken profiles."""

    @classmethod
    @pa.check_types(lazy=True)
    def transform(
        cls, profile: DataFrame[BrackenProfile]
    ) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given Bracken profile.

        Args:
            profile: A taxonomic profile generated by Bracken.

        Returns:
            A standardized profile.

        Raises:
            pandera.errors.SchemaErrors: If the given profile does not conform with the
                `BrackenProfile` or the transformed output does not conform with the
                `StandardProfile`.  # noqa: DAR402

        """
        return (
            profile[[BrackenProfile.taxonomy_id, BrackenProfile.new_est_reads]]
            .copy()
            .rename(
                columns={
                    BrackenProfile.taxonomy_id: StandardProfile.taxonomy_id,
                    BrackenProfile.new_est_reads: StandardProfile.count,
                }
            )
        )

Functions¶

transform(profile: DataFrame[BrackenProfile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given Bracken profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[BrackenProfile]`	A taxonomic profile generated by Bracken.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Raises:

Type	Description
`pandera.errors.SchemaErrors`	If the given profile does not conform with the `BrackenProfile` or the transformed output does not conform with the `StandardProfile`. # noqa: DAR402

Source code in taxpasta/infrastructure/application/bracken/bracken_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(
    cls, profile: DataFrame[BrackenProfile]
) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given Bracken profile.

    Args:
        profile: A taxonomic profile generated by Bracken.

    Returns:
        A standardized profile.

    Raises:
        pandera.errors.SchemaErrors: If the given profile does not conform with the
            `BrackenProfile` or the transformed output does not conform with the
            `StandardProfile`.  # noqa: DAR402

    """
    return (
        profile[[BrackenProfile.taxonomy_id, BrackenProfile.new_est_reads]]
        .copy()
        .rename(
            columns={
                BrackenProfile.taxonomy_id: StandardProfile.taxonomy_id,
                BrackenProfile.new_est_reads: StandardProfile.count,
            }
        )
    )

`centrifuge` ¶

Classes¶

Modules¶

centrifuge_profile ¶

Provide a description of the centrifuge profile format.

Classes¶

CentrifugeProfile ¶

Bases: pa.SchemaModel

Define the expected centrifuge profile format.

Source code in taxpasta/infrastructure/application/centrifuge/centrifuge_profile.py

class CentrifugeProfile(pa.SchemaModel):
    """Define the expected centrifuge profile format."""

    percent: Series[float] = pa.Field(ge=0.0, le=100.0)
    clade_assigned_reads: Series[int] = pa.Field(ge=0)
    direct_assigned_reads: Series[int] = pa.Field(ge=0)
    taxonomy_level: Series[pd.CategoricalDtype] = pa.Field()
    taxonomy_id: Series[int] = pa.Field(ge=0)
    name: Series[str] = pa.Field()

    @pa.check("percent", name="compositionality")
    @classmethod
    def check_compositionality(cls, percent: Series[float]) -> bool:
        """Check that the percent of 'unclassified' and 'root' add up to a hundred."""
        return len(percent) == 0 or bool(
            np.isclose(percent[:2].sum(), 100.0, atol=0.01)
        )

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

clade_assigned_reads: Series[int] = pa.Field(ge=0) class-attribute ¶

direct_assigned_reads: Series[int] = pa.Field(ge=0) class-attribute ¶

name: Series[str] = pa.Field() class-attribute ¶

percent: Series[float] = pa.Field(ge=0.0, le=100.0) class-attribute ¶

taxonomy_id: Series[int] = pa.Field(ge=0) class-attribute ¶

taxonomy_level: Series[pd.CategoricalDtype] = pa.Field() class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/centrifuge/centrifuge_profile.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

Functions¶

check_compositionality(percent: Series[float]) -> bool classmethod ¶

Check that the percent of 'unclassified' and 'root' add up to a hundred.

Source code in taxpasta/infrastructure/application/centrifuge/centrifuge_profile.py

@pa.check("percent", name="compositionality")
@classmethod
def check_compositionality(cls, percent: Series[float]) -> bool:
    """Check that the percent of 'unclassified' and 'root' add up to a hundred."""
    return len(percent) == 0 or bool(
        np.isclose(percent[:2].sum(), 100.0, atol=0.01)
    )

centrifuge_profile_reader ¶

Provide a reader for Centrifuge profiles.

Attributes¶ Classes¶

CentrifugeProfileReader ¶

Bases: ProfileReader

Define a reader for centrifuge profiles.

Source code in taxpasta/infrastructure/application/centrifuge/centrifuge_profile_reader.py

class CentrifugeProfileReader(ProfileReader):
    """Define a reader for centrifuge profiles."""

    @classmethod
    def read(cls, profile: BufferOrFilepath) -> DataFrame[CentrifugeProfile]:
        """
        Read a centrifuge taxonomic profile from the given source.

        Args:
            profile: A source that contains a tab-separated taxonomic profile generated
                by centrifuge.

        Returns:
            A data frame representation of the centrifuge profile.

        """
        result = pd.read_table(
            filepath_or_buffer=profile,
            sep="\t",
            header=None,
            index_col=False,
            skipinitialspace=True,
            names=[
                CentrifugeProfile.percent,
                CentrifugeProfile.clade_assigned_reads,
                CentrifugeProfile.direct_assigned_reads,
                CentrifugeProfile.taxonomy_level,
                CentrifugeProfile.taxonomy_id,
                CentrifugeProfile.name,
            ],
        )
        cls._check_num_columns(result, CentrifugeProfile)
        return result

Functions¶

read(profile: BufferOrFilepath) -> DataFrame[CentrifugeProfile] classmethod ¶

Read a centrifuge taxonomic profile from the given source.

Parameters:

Name	Type	Description	Default
`profile`	`BufferOrFilepath`	A source that contains a tab-separated taxonomic profile generated by centrifuge.	required

Returns:

Type	Description
`DataFrame[CentrifugeProfile]`	A data frame representation of the centrifuge profile.

Source code in taxpasta/infrastructure/application/centrifuge/centrifuge_profile_reader.py

@classmethod
def read(cls, profile: BufferOrFilepath) -> DataFrame[CentrifugeProfile]:
    """
    Read a centrifuge taxonomic profile from the given source.

    Args:
        profile: A source that contains a tab-separated taxonomic profile generated
            by centrifuge.

    Returns:
        A data frame representation of the centrifuge profile.

    """
    result = pd.read_table(
        filepath_or_buffer=profile,
        sep="\t",
        header=None,
        index_col=False,
        skipinitialspace=True,
        names=[
            CentrifugeProfile.percent,
            CentrifugeProfile.clade_assigned_reads,
            CentrifugeProfile.direct_assigned_reads,
            CentrifugeProfile.taxonomy_level,
            CentrifugeProfile.taxonomy_id,
            CentrifugeProfile.name,
        ],
    )
    cls._check_num_columns(result, CentrifugeProfile)
    return result

centrifuge_profile_standardisation_service ¶

Provide a standardisation service for centrifuge profiles.

Attributes¶

logger = logging.getLogger(__name__) module-attribute ¶

Classes¶

CentrifugeProfileStandardisationService ¶

Define a standardisation service for centrifuge profiles.

Source code in taxpasta/infrastructure/application/centrifuge/centrifuge_profile_standardisation_service.py

class CentrifugeProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for centrifuge profiles."""

    @classmethod
    @pa.check_types(lazy=True)
    def transform(
        cls, profile: DataFrame[CentrifugeProfile]
    ) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given centrifuge profile.

        Args:
            profile: A taxonomic profile generated by centrifuge.

        Returns:
            A standardized profile.

        """
        return (
            profile[
                [CentrifugeProfile.taxonomy_id, CentrifugeProfile.direct_assigned_reads]
            ]
            .copy()
            .rename(
                columns={
                    CentrifugeProfile.taxonomy_id: StandardProfile.taxonomy_id,
                    CentrifugeProfile.direct_assigned_reads: StandardProfile.count,
                }
            )
        )

Functions¶

transform(profile: DataFrame[CentrifugeProfile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given centrifuge profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[CentrifugeProfile]`	A taxonomic profile generated by centrifuge.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Source code in taxpasta/infrastructure/application/centrifuge/centrifuge_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(
    cls, profile: DataFrame[CentrifugeProfile]
) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given centrifuge profile.

    Args:
        profile: A taxonomic profile generated by centrifuge.

    Returns:
        A standardized profile.

    """
    return (
        profile[
            [CentrifugeProfile.taxonomy_id, CentrifugeProfile.direct_assigned_reads]
        ]
        .copy()
        .rename(
            columns={
                CentrifugeProfile.taxonomy_id: StandardProfile.taxonomy_id,
                CentrifugeProfile.direct_assigned_reads: StandardProfile.count,
            }
        )
    )

`diamond` ¶

Classes¶

Modules¶

diamond_profile ¶

Provide a description of the diamond profile format.

Classes¶

DiamondProfile ¶

Bases: pa.SchemaModel

Define the expected diamond profile format.

Source code in taxpasta/infrastructure/application/diamond/diamond_profile.py

class DiamondProfile(pa.SchemaModel):
    """Define the expected diamond profile format."""

    query_id: Series[str] = pa.Field()
    taxonomy_id: Series[int] = pa.Field(ge=0)
    e_value: Series[float] = pa.Field(ge=0.0, le=1.0)

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

e_value: Series[float] = pa.Field(ge=0.0, le=1.0) class-attribute ¶

query_id: Series[str] = pa.Field() class-attribute ¶

taxonomy_id: Series[int] = pa.Field(ge=0) class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/diamond/diamond_profile.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

diamond_profile_reader ¶

Provide a reader for diamond profiles.

Attributes¶ Classes¶

DiamondProfileReader ¶

Bases: ProfileReader

Define a reader for Diamond profiles.

Source code in taxpasta/infrastructure/application/diamond/diamond_profile_reader.py

class DiamondProfileReader(ProfileReader):
    """Define a reader for Diamond profiles."""

    LARGE_INTEGER = int(10e6)

    @classmethod
    def read(cls, profile: BufferOrFilepath) -> DataFrame[DiamondProfile]:
        """Read a diamond taxonomic profile from a file."""
        result = pd.read_table(
            filepath_or_buffer=profile,
            sep="\t",
            header=None,
            index_col=False,
            names=[
                DiamondProfile.query_id,
                DiamondProfile.taxonomy_id,
                DiamondProfile.e_value,
            ],
        )
        cls._check_num_columns(result, DiamondProfile)
        return result

Attributes¶

LARGE_INTEGER = int(10000000.0) class-attribute ¶

Functions¶

read(profile: BufferOrFilepath) -> DataFrame[DiamondProfile] classmethod ¶

Read a diamond taxonomic profile from a file.

Source code in taxpasta/infrastructure/application/diamond/diamond_profile_reader.py

@classmethod
def read(cls, profile: BufferOrFilepath) -> DataFrame[DiamondProfile]:
    """Read a diamond taxonomic profile from a file."""
    result = pd.read_table(
        filepath_or_buffer=profile,
        sep="\t",
        header=None,
        index_col=False,
        names=[
            DiamondProfile.query_id,
            DiamondProfile.taxonomy_id,
            DiamondProfile.e_value,
        ],
    )
    cls._check_num_columns(result, DiamondProfile)
    return result

diamond_profile_standardisation_service ¶

Provide a standardisation service for diamond profiles.

Classes¶

DiamondProfileStandardisationService ¶

Define a standardisation service for diamond profiles.

Source code in taxpasta/infrastructure/application/diamond/diamond_profile_standardisation_service.py

class DiamondProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for diamond profiles."""

    @classmethod
    @pa.check_types(lazy=True)
    def transform(
        cls, profile: DataFrame[DiamondProfile]
    ) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given diamond profile.

        Args:
            profile: A taxonomic profile generated by diamond.

        Returns:
            A standardized profile.

        """
        # Sum up occurrences of taxonomy identifiers to yield read count.
        return (
            profile[[DiamondProfile.taxonomy_id]]
            .groupby(DiamondProfile.taxonomy_id, sort=False)
            .size()
            .reset_index()
            .rename(
                columns={
                    DiamondProfile.taxonomy_id: StandardProfile.taxonomy_id,
                    0: StandardProfile.count,
                }
            )
        )

Functions¶

transform(profile: DataFrame[DiamondProfile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given diamond profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[DiamondProfile]`	A taxonomic profile generated by diamond.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Source code in taxpasta/infrastructure/application/diamond/diamond_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(
    cls, profile: DataFrame[DiamondProfile]
) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given diamond profile.

    Args:
        profile: A taxonomic profile generated by diamond.

    Returns:
        A standardized profile.

    """
    # Sum up occurrences of taxonomy identifiers to yield read count.
    return (
        profile[[DiamondProfile.taxonomy_id]]
        .groupby(DiamondProfile.taxonomy_id, sort=False)
        .size()
        .reset_index()
        .rename(
            columns={
                DiamondProfile.taxonomy_id: StandardProfile.taxonomy_id,
                0: StandardProfile.count,
            }
        )
    )

`kaiju` ¶

Classes¶

Modules¶

kaiju_profile ¶

Provide a description of the kaiju profile format.

Classes¶

KaijuProfile ¶

Bases: pa.SchemaModel

Define the expected kaiju profile format.

Source code in taxpasta/infrastructure/application/kaiju/kaiju_profile.py

class KaijuProfile(pa.SchemaModel):
    """Define the expected kaiju profile format."""

    file: Series[str] = pa.Field()
    percent: Series[float] = pa.Field(ge=0.0, le=100.0)
    reads: Series[int] = pa.Field(ge=0)
    # Pandas cannot handle missing integer values. Thus, we read this column as string.
    taxon_id: Series[str] = pa.Field(nullable=True)
    taxon_name: Series[str] = pa.Field()

    @pa.check("percent", name="compositionality")
    @classmethod
    def check_compositionality(cls, percent: Series[float]) -> bool:
        """Check that the percentages add up to a hundred."""
        # Kaiju reports percentages with sixth decimals
        return len(percent) == 0 or bool(np.isclose(percent.sum(), 100.0, atol=1e-06))

    @pa.check("file", name="unique_filename")
    @classmethod
    def check_unique_filename(cls, file_col: Series[str]) -> bool:
        """Check that Kaiju filename is unique."""
        return len(file_col) == 0 or file_col.nunique() == 1

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

file: Series[str] = pa.Field() class-attribute ¶

percent: Series[float] = pa.Field(ge=0.0, le=100.0) class-attribute ¶

reads: Series[int] = pa.Field(ge=0) class-attribute ¶

taxon_id: Series[str] = pa.Field(nullable=True) class-attribute ¶

taxon_name: Series[str] = pa.Field() class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/kaiju/kaiju_profile.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

Functions¶

check_compositionality(percent: Series[float]) -> bool classmethod ¶

Check that the percentages add up to a hundred.

Source code in taxpasta/infrastructure/application/kaiju/kaiju_profile.py

@pa.check("percent", name="compositionality")
@classmethod
def check_compositionality(cls, percent: Series[float]) -> bool:
    """Check that the percentages add up to a hundred."""
    # Kaiju reports percentages with sixth decimals
    return len(percent) == 0 or bool(np.isclose(percent.sum(), 100.0, atol=1e-06))

check_unique_filename(file_col: Series[str]) -> bool classmethod ¶

Check that Kaiju filename is unique.

Source code in taxpasta/infrastructure/application/kaiju/kaiju_profile.py

@pa.check("file", name="unique_filename")
@classmethod
def check_unique_filename(cls, file_col: Series[str]) -> bool:
    """Check that Kaiju filename is unique."""
    return len(file_col) == 0 or file_col.nunique() == 1

kaiju_profile_reader ¶

Provide a reader for kaiju profiles.

Attributes¶ Classes¶

KaijuProfileReader ¶

Bases: ProfileReader

Define a reader for kaiju profiles.

Source code in taxpasta/infrastructure/application/kaiju/kaiju_profile_reader.py

class KaijuProfileReader(ProfileReader):
    """Define a reader for kaiju profiles."""

    @classmethod
    def read(cls, profile: BufferOrFilepath) -> DataFrame[KaijuProfile]:
        """
        Read a kaiju taxonomic profile from the given source.

        Args:
            profile: A source that contains a tab-separated taxonomic profile generated
                by kaiju.

        Returns:
            A data frame representation of the kaiju profile.

        """
        result = pd.read_table(
            filepath_or_buffer=profile,
            sep="\t",
            header=0,
            index_col=False,
            dtype={KaijuProfile.taxon_id: str},
        )
        cls._check_num_columns(result, KaijuProfile)
        return result

Functions¶

read(profile: BufferOrFilepath) -> DataFrame[KaijuProfile] classmethod ¶

Read a kaiju taxonomic profile from the given source.

Parameters:

Name	Type	Description	Default
`profile`	`BufferOrFilepath`	A source that contains a tab-separated taxonomic profile generated by kaiju.	required

Returns:

Type	Description
`DataFrame[KaijuProfile]`	A data frame representation of the kaiju profile.

Source code in taxpasta/infrastructure/application/kaiju/kaiju_profile_reader.py

@classmethod
def read(cls, profile: BufferOrFilepath) -> DataFrame[KaijuProfile]:
    """
    Read a kaiju taxonomic profile from the given source.

    Args:
        profile: A source that contains a tab-separated taxonomic profile generated
            by kaiju.

    Returns:
        A data frame representation of the kaiju profile.

    """
    result = pd.read_table(
        filepath_or_buffer=profile,
        sep="\t",
        header=0,
        index_col=False,
        dtype={KaijuProfile.taxon_id: str},
    )
    cls._check_num_columns(result, KaijuProfile)
    return result

kaiju_profile_standardisation_service ¶

Provide a standardisation service for kaiju profiles.

Classes¶

KaijuProfileStandardisationService ¶

Define a standardisation service for kaiju profiles.

Source code in taxpasta/infrastructure/application/kaiju/kaiju_profile_standardisation_service.py

class KaijuProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for kaiju profiles."""

    @classmethod
    @pa.check_types(lazy=True)
    def transform(cls, profile: DataFrame[KaijuProfile]) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given kaiju profile.

        Args:
            profile: A taxonomic profile generated by kaiju.

        Returns:
            A standardized profile.

        """
        temp = (
            profile[[KaijuProfile.taxon_id, KaijuProfile.reads]]
            .copy()
            .rename(
                columns={
                    KaijuProfile.taxon_id: StandardProfile.taxonomy_id,
                    KaijuProfile.reads: StandardProfile.count,
                }
            )
        )
        result = temp.loc[temp[StandardProfile.taxonomy_id].notnull(), :].copy()
        result[StandardProfile.taxonomy_id] = result[
            StandardProfile.taxonomy_id
        ].astype(int)
        # Replace missing values (unclassified reads) with zeroes and sum reads.
        return pd.concat(
            [
                result,
                pd.DataFrame(
                    {
                        StandardProfile.taxonomy_id: [0],
                        StandardProfile.count: [
                            temp.loc[
                                temp[StandardProfile.taxonomy_id].isnull(),
                                StandardProfile.count,
                            ].sum()
                        ],
                    }
                ),
            ],
            ignore_index=True,
        )

Functions¶

transform(profile: DataFrame[KaijuProfile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given kaiju profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[KaijuProfile]`	A taxonomic profile generated by kaiju.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Source code in taxpasta/infrastructure/application/kaiju/kaiju_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(cls, profile: DataFrame[KaijuProfile]) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given kaiju profile.

    Args:
        profile: A taxonomic profile generated by kaiju.

    Returns:
        A standardized profile.

    """
    temp = (
        profile[[KaijuProfile.taxon_id, KaijuProfile.reads]]
        .copy()
        .rename(
            columns={
                KaijuProfile.taxon_id: StandardProfile.taxonomy_id,
                KaijuProfile.reads: StandardProfile.count,
            }
        )
    )
    result = temp.loc[temp[StandardProfile.taxonomy_id].notnull(), :].copy()
    result[StandardProfile.taxonomy_id] = result[
        StandardProfile.taxonomy_id
    ].astype(int)
    # Replace missing values (unclassified reads) with zeroes and sum reads.
    return pd.concat(
        [
            result,
            pd.DataFrame(
                {
                    StandardProfile.taxonomy_id: [0],
                    StandardProfile.count: [
                        temp.loc[
                            temp[StandardProfile.taxonomy_id].isnull(),
                            StandardProfile.count,
                        ].sum()
                    ],
                }
            ),
        ],
        ignore_index=True,
    )

`kraken2` ¶

Classes¶

Modules¶

kraken2_profile ¶

Provide a description of the kraken2 profile format.

Classes¶

Kraken2Profile ¶

Bases: pa.SchemaModel

Define the expected kraken2 profile format.

Source code in taxpasta/infrastructure/application/kraken2/kraken2_profile.py

class Kraken2Profile(pa.SchemaModel):
    """Define the expected kraken2 profile format."""

    percent: Series[float] = pa.Field(ge=0.0, le=100.0)
    clade_assigned_reads: Series[int] = pa.Field(ge=0)
    direct_assigned_reads: Series[int] = pa.Field(ge=0)
    num_minimizers: Optional[Series[int]] = pa.Field(ge=0)
    distinct_minimizers: Optional[Series[int]] = pa.Field(ge=0)
    taxonomy_lvl: Series[pd.CategoricalDtype] = pa.Field()
    taxonomy_id: Series[int] = pa.Field(ge=0)
    name: Series[str] = pa.Field()

    @pa.dataframe_check
    @classmethod
    def check_compositionality(cls, profile: pd.DataFrame) -> bool:
        """Check that the percent of 'unclassified' and 'root' add up to a hundred."""
        # Kraken2 reports percentages only to the second decimal, so we expect
        # some deviation.
        # If 100% of reads are assigned, unclassified reads are not reported at all.
        return len(profile) == 0 or bool(
            np.isclose(
                profile.loc[
                    profile[cls.taxonomy_lvl].isin(["U", "R"]), cls.percent
                ].sum(),
                100.0,
                atol=0.01,
            )
        )

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

clade_assigned_reads: Series[int] = pa.Field(ge=0) class-attribute ¶

direct_assigned_reads: Series[int] = pa.Field(ge=0) class-attribute ¶

distinct_minimizers: Optional[Series[int]] = pa.Field(ge=0) class-attribute ¶

name: Series[str] = pa.Field() class-attribute ¶

num_minimizers: Optional[Series[int]] = pa.Field(ge=0) class-attribute ¶

percent: Series[float] = pa.Field(ge=0.0, le=100.0) class-attribute ¶

taxonomy_id: Series[int] = pa.Field(ge=0) class-attribute ¶

taxonomy_lvl: Series[pd.CategoricalDtype] = pa.Field() class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/kraken2/kraken2_profile.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

Functions¶

check_compositionality(profile: pd.DataFrame) -> bool classmethod ¶

Check that the percent of 'unclassified' and 'root' add up to a hundred.

Source code in taxpasta/infrastructure/application/kraken2/kraken2_profile.py

@pa.dataframe_check
@classmethod
def check_compositionality(cls, profile: pd.DataFrame) -> bool:
    """Check that the percent of 'unclassified' and 'root' add up to a hundred."""
    # Kraken2 reports percentages only to the second decimal, so we expect
    # some deviation.
    # If 100% of reads are assigned, unclassified reads are not reported at all.
    return len(profile) == 0 or bool(
        np.isclose(
            profile.loc[
                profile[cls.taxonomy_lvl].isin(["U", "R"]), cls.percent
            ].sum(),
            100.0,
            atol=0.01,
        )
    )

kraken2_profile_reader ¶

Provide a reader for kraken2 profiles.

Attributes¶ Classes¶

Kraken2ProfileReader ¶

Bases: ProfileReader

Define a reader for kraken2 profiles.

Source code in taxpasta/infrastructure/application/kraken2/kraken2_profile_reader.py

class Kraken2ProfileReader(ProfileReader):
    """Define a reader for kraken2 profiles."""

    @classmethod
    def read(cls, profile: BufferOrFilepath) -> DataFrame[Kraken2Profile]:
        """
        Read a kraken2 taxonomic profile from the given source.

        Args:
            profile: A source that contains a tab-separated taxonomic profile generated
                by kraken2.

        Returns:
            A data frame representation of the kraken2 profile.

        Raises:
            ValueError: In case the table does not contain exactly six or eight columns.

        """
        result = pd.read_table(
            filepath_or_buffer=profile,
            sep="\t",
            header=None,
            index_col=False,
            skipinitialspace=True,
        )
        if len(result.columns) == 6:
            result.columns = [
                Kraken2Profile.percent,
                Kraken2Profile.clade_assigned_reads,
                Kraken2Profile.direct_assigned_reads,
                Kraken2Profile.taxonomy_lvl,
                Kraken2Profile.taxonomy_id,
                Kraken2Profile.name,
            ]
        elif len(result.columns) == 8:
            result.columns = [
                Kraken2Profile.percent,
                Kraken2Profile.clade_assigned_reads,
                Kraken2Profile.direct_assigned_reads,
                Kraken2Profile.num_minimizers,
                Kraken2Profile.distinct_minimizers,
                Kraken2Profile.taxonomy_lvl,
                Kraken2Profile.taxonomy_id,
                Kraken2Profile.name,
            ]
        else:
            raise ValueError(
                f"Unexpected kraken2 report format. It has {len(result.columns)} "
                f"columns but only six or eight are expected."
            )
        return result

Functions¶

read(profile: BufferOrFilepath) -> DataFrame[Kraken2Profile] classmethod ¶

Read a kraken2 taxonomic profile from the given source.

Parameters:

Name	Type	Description	Default
`profile`	`BufferOrFilepath`	A source that contains a tab-separated taxonomic profile generated by kraken2.	required

Returns:

Type	Description
`DataFrame[Kraken2Profile]`	A data frame representation of the kraken2 profile.

Raises:

Type	Description
`ValueError`	In case the table does not contain exactly six or eight columns.

Source code in taxpasta/infrastructure/application/kraken2/kraken2_profile_reader.py

@classmethod
def read(cls, profile: BufferOrFilepath) -> DataFrame[Kraken2Profile]:
    """
    Read a kraken2 taxonomic profile from the given source.

    Args:
        profile: A source that contains a tab-separated taxonomic profile generated
            by kraken2.

    Returns:
        A data frame representation of the kraken2 profile.

    Raises:
        ValueError: In case the table does not contain exactly six or eight columns.

    """
    result = pd.read_table(
        filepath_or_buffer=profile,
        sep="\t",
        header=None,
        index_col=False,
        skipinitialspace=True,
    )
    if len(result.columns) == 6:
        result.columns = [
            Kraken2Profile.percent,
            Kraken2Profile.clade_assigned_reads,
            Kraken2Profile.direct_assigned_reads,
            Kraken2Profile.taxonomy_lvl,
            Kraken2Profile.taxonomy_id,
            Kraken2Profile.name,
        ]
    elif len(result.columns) == 8:
        result.columns = [
            Kraken2Profile.percent,
            Kraken2Profile.clade_assigned_reads,
            Kraken2Profile.direct_assigned_reads,
            Kraken2Profile.num_minimizers,
            Kraken2Profile.distinct_minimizers,
            Kraken2Profile.taxonomy_lvl,
            Kraken2Profile.taxonomy_id,
            Kraken2Profile.name,
        ]
    else:
        raise ValueError(
            f"Unexpected kraken2 report format. It has {len(result.columns)} "
            f"columns but only six or eight are expected."
        )
    return result

kraken2_profile_standardisation_service ¶

Provide a standardisation service for kraken2 profiles.

Classes¶

Kraken2ProfileStandardisationService ¶

Define a standardisation service for kraken2 profiles.

Source code in taxpasta/infrastructure/application/kraken2/kraken2_profile_standardisation_service.py

class Kraken2ProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for kraken2 profiles."""

    @classmethod
    @pa.check_types(lazy=True)
    def transform(
        cls, profile: DataFrame[Kraken2Profile]
    ) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given kraken2 profile.

        Args:
            profile: A taxonomic profile generated by kraken2.

        Returns:
            A standardized profile.

        """
        return (
            profile[[Kraken2Profile.taxonomy_id, Kraken2Profile.direct_assigned_reads]]
            .copy()
            .rename(
                columns={
                    Kraken2Profile.taxonomy_id: StandardProfile.taxonomy_id,
                    Kraken2Profile.direct_assigned_reads: StandardProfile.count,
                }
            )
        )

Functions¶

transform(profile: DataFrame[Kraken2Profile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given kraken2 profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[Kraken2Profile]`	A taxonomic profile generated by kraken2.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Source code in taxpasta/infrastructure/application/kraken2/kraken2_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(
    cls, profile: DataFrame[Kraken2Profile]
) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given kraken2 profile.

    Args:
        profile: A taxonomic profile generated by kraken2.

    Returns:
        A standardized profile.

    """
    return (
        profile[[Kraken2Profile.taxonomy_id, Kraken2Profile.direct_assigned_reads]]
        .copy()
        .rename(
            columns={
                Kraken2Profile.taxonomy_id: StandardProfile.taxonomy_id,
                Kraken2Profile.direct_assigned_reads: StandardProfile.count,
            }
        )
    )

`krakenuniq` ¶

Classes¶

Modules¶

krakenuniq_profile ¶

Provide a description of the KrakenUniq profile format.

Classes¶

KrakenUniqProfile ¶

Bases: pa.SchemaModel

Define the expected KrakenUniq profile format.

Source code in taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile.py

class KrakenUniqProfile(pa.SchemaModel):
    """Define the expected KrakenUniq profile format."""

    percent: Series[float] = pa.Field(ge=0.0, le=100.0, alias="%")
    reads: Series[int] = pa.Field(ge=0)
    tax_reads: Series[int] = pa.Field(ge=0, alias="taxReads")
    kmers: Series[int] = pa.Field(ge=0)
    duplicates: Series[float] = pa.Field(ge=0.0, alias="dup")
    coverage: Series[float] = pa.Field(ge=0.0, alias="cov")
    tax_id: Series[int] = pa.Field(alias="taxID", ge=0)
    rank: Series[pd.CategoricalDtype] = pa.Field()
    tax_name: Series[str] = pa.Field(alias="taxName")

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

coverage: Series[float] = pa.Field(ge=0.0, alias='cov') class-attribute ¶

duplicates: Series[float] = pa.Field(ge=0.0, alias='dup') class-attribute ¶

kmers: Series[int] = pa.Field(ge=0) class-attribute ¶

percent: Series[float] = pa.Field(ge=0.0, le=100.0, alias='%') class-attribute ¶

rank: Series[pd.CategoricalDtype] = pa.Field() class-attribute ¶

reads: Series[int] = pa.Field(ge=0) class-attribute ¶

tax_id: Series[int] = pa.Field(alias='taxID', ge=0) class-attribute ¶

tax_name: Series[str] = pa.Field(alias='taxName') class-attribute ¶

tax_reads: Series[int] = pa.Field(ge=0, alias='taxReads') class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

krakenuniq_profile_reader ¶

Provide a reader for KrakenUniq profiles.

Attributes¶ Classes¶

KrakenUniqProfileReader ¶

Bases: ProfileReader

Define a reader for KrakenUniq profiles.

Source code in taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile_reader.py

class KrakenUniqProfileReader(ProfileReader):
    """Define a reader for KrakenUniq profiles."""

    @classmethod
    def read(cls, profile: BufferOrFilepath) -> DataFrame[KrakenUniqProfile]:
        """
        Read a krakenUniq taxonomic profile from the given source.

        Args:
            profile: A source that contains a tab-separated taxonomic profile generated
                by KrakenUniq.

        Returns:
            A data frame representation of the KrakenUniq profile.

        """
        result = pd.read_table(
            filepath_or_buffer=profile,
            sep="\t",
            skiprows=3,
            header=0,
            index_col=False,
            skipinitialspace=True,
        )
        cls._check_num_columns(result, KrakenUniqProfile)
        return result

Functions¶

read(profile: BufferOrFilepath) -> DataFrame[KrakenUniqProfile] classmethod ¶

Read a krakenUniq taxonomic profile from the given source.

Parameters:

Name	Type	Description	Default
`profile`	`BufferOrFilepath`	A source that contains a tab-separated taxonomic profile generated by KrakenUniq.	required

Returns:

Type	Description
`DataFrame[KrakenUniqProfile]`	A data frame representation of the KrakenUniq profile.

Source code in taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile_reader.py

@classmethod
def read(cls, profile: BufferOrFilepath) -> DataFrame[KrakenUniqProfile]:
    """
    Read a krakenUniq taxonomic profile from the given source.

    Args:
        profile: A source that contains a tab-separated taxonomic profile generated
            by KrakenUniq.

    Returns:
        A data frame representation of the KrakenUniq profile.

    """
    result = pd.read_table(
        filepath_or_buffer=profile,
        sep="\t",
        skiprows=3,
        header=0,
        index_col=False,
        skipinitialspace=True,
    )
    cls._check_num_columns(result, KrakenUniqProfile)
    return result

krakenuniq_profile_standardisation_service ¶

Provide a standardisation service for KrakenUniq profiles.

Classes¶

KrakenUniqProfileStandardisationService ¶

Define a standardisation service for krakenUniq profiles.

Source code in taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile_standardisation_service.py

class KrakenUniqProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for krakenUniq profiles."""

    @classmethod
    @pa.check_types(lazy=True)
    def transform(
        cls, profile: DataFrame[KrakenUniqProfile]
    ) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given krakenUniq profile.

        Args:
            profile: A taxonomic profile generated by KrakenUniq.

        Returns:
            A standardized profile.

        """
        return (
            profile[[KrakenUniqProfile.tax_id, KrakenUniqProfile.tax_reads]]
            .copy()
            .rename(
                columns={
                    KrakenUniqProfile.tax_id: StandardProfile.taxonomy_id,
                    KrakenUniqProfile.tax_reads: StandardProfile.count,
                }
            )
        )

Functions¶

transform(profile: DataFrame[KrakenUniqProfile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given krakenUniq profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[KrakenUniqProfile]`	A taxonomic profile generated by KrakenUniq.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Source code in taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(
    cls, profile: DataFrame[KrakenUniqProfile]
) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given krakenUniq profile.

    Args:
        profile: A taxonomic profile generated by KrakenUniq.

    Returns:
        A standardized profile.

    """
    return (
        profile[[KrakenUniqProfile.tax_id, KrakenUniqProfile.tax_reads]]
        .copy()
        .rename(
            columns={
                KrakenUniqProfile.tax_id: StandardProfile.taxonomy_id,
                KrakenUniqProfile.tax_reads: StandardProfile.count,
            }
        )
    )

`megan6` ¶

Classes¶

Modules¶

megan6_profile ¶

Provide a description of the MEGAN6 rma2info profile format.

Classes¶

Megan6Profile ¶

Bases: pa.SchemaModel

Define the expected MEGAN6 rma2info profile format.

Source code in taxpasta/infrastructure/application/megan6/megan6_profile.py

class Megan6Profile(pa.SchemaModel):
    """Define the expected MEGAN6 rma2info profile format."""

    taxonomy_id: Series[int] = pa.Field(ge=0)
    count: Series[int] = pa.Field(ge=0.0)

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

count: Series[int] = pa.Field(ge=0.0) class-attribute ¶

taxonomy_id: Series[int] = pa.Field(ge=0) class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/megan6/megan6_profile.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

megan6_profile_reader ¶

Provide a reader for megan6 profiles.

Attributes¶ Classes¶

Megan6ProfileReader ¶

Bases: ProfileReader

Define a reader for MEGAN6 rma2info profiles.

Source code in taxpasta/infrastructure/application/megan6/megan6_profile_reader.py

class Megan6ProfileReader(ProfileReader):
    """Define a reader for MEGAN6 rma2info profiles."""

    LARGE_INTEGER = int(10e6)

    @classmethod
    def read(cls, profile: BufferOrFilepath) -> DataFrame[Megan6Profile]:
        """Read a MEGAN6 rma2info taxonomic profile from a file."""
        result = pd.read_table(
            filepath_or_buffer=profile,
            sep="\t",
            names=[Megan6Profile.taxonomy_id, Megan6Profile.count],
            index_col=False,
        )
        cls._check_num_columns(result, Megan6Profile)
        return result

Attributes¶

LARGE_INTEGER = int(10000000.0) class-attribute ¶

Functions¶

read(profile: BufferOrFilepath) -> DataFrame[Megan6Profile] classmethod ¶

Read a MEGAN6 rma2info taxonomic profile from a file.

Source code in taxpasta/infrastructure/application/megan6/megan6_profile_reader.py

@classmethod
def read(cls, profile: BufferOrFilepath) -> DataFrame[Megan6Profile]:
    """Read a MEGAN6 rma2info taxonomic profile from a file."""
    result = pd.read_table(
        filepath_or_buffer=profile,
        sep="\t",
        names=[Megan6Profile.taxonomy_id, Megan6Profile.count],
        index_col=False,
    )
    cls._check_num_columns(result, Megan6Profile)
    return result

megan6_profile_standardisation_service ¶

Provide a standardisation service for megan6 profiles.

Classes¶

Megan6ProfileStandardisationService ¶

Define a standardisation service for megan6 profiles.

Source code in taxpasta/infrastructure/application/megan6/megan6_profile_standardisation_service.py

class Megan6ProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for megan6 profiles."""

    @classmethod
    @pa.check_types(lazy=True)
    def transform(cls, profile: DataFrame[Megan6Profile]) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given MEGAN6 rma2info profile.

        Args:
            profile: A taxonomic profile generated by MEGAN6 rma2info.

        Returns:
            A standardized profile.

        """
        return (
            profile[[Megan6Profile.taxonomy_id, Megan6Profile.count]]
            .copy()
            .rename(
                columns={
                    Megan6Profile.taxonomy_id: StandardProfile.taxonomy_id,
                    Megan6Profile.count: StandardProfile.count,
                }
            )
        )

Functions¶

transform(profile: DataFrame[Megan6Profile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given MEGAN6 rma2info profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[Megan6Profile]`	A taxonomic profile generated by MEGAN6 rma2info.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Source code in taxpasta/infrastructure/application/megan6/megan6_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(cls, profile: DataFrame[Megan6Profile]) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given MEGAN6 rma2info profile.

    Args:
        profile: A taxonomic profile generated by MEGAN6 rma2info.

    Returns:
        A standardized profile.

    """
    return (
        profile[[Megan6Profile.taxonomy_id, Megan6Profile.count]]
        .copy()
        .rename(
            columns={
                Megan6Profile.taxonomy_id: StandardProfile.taxonomy_id,
                Megan6Profile.count: StandardProfile.count,
            }
        )
    )

`metaphlan` ¶

Classes¶

Modules¶

metaphlan_profile ¶

Provide a description of the metaphlan profile format.

Classes¶

MetaphlanProfile ¶

Bases: pa.SchemaModel

Define the expected metaphlan profile format.

Source code in taxpasta/infrastructure/application/metaphlan/metaphlan_profile.py

class MetaphlanProfile(pa.SchemaModel):
    """Define the expected metaphlan profile format."""

    clade_name: Series[str] = pa.Field()
    # MetaPhlan provides the full lineage of tax IDs in this field.
    ncbi_tax_id: Series[str] = pa.Field(alias="NCBI_tax_id")
    relative_abundance: Series[float] = pa.Field(ge=0.0, le=100.0)
    additional_species: Optional[Series[str]] = pa.Field(nullable=True)

    @pa.dataframe_check
    @classmethod
    def check_compositionality(cls, profile: pd.DataFrame) -> bool:
        """Check that the percentages per rank add up to a hundred."""
        # Parse the rank from the given lineage.
        rank = profile[cls.clade_name].str.rsplit("|", n=1).str[-1].str[0]
        return np.allclose(
            profile.groupby(rank, sort=False)[cls.relative_abundance].sum(),
            100.0,
            atol=1.0,
        )

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

additional_species: Optional[Series[str]] = pa.Field(nullable=True) class-attribute ¶

clade_name: Series[str] = pa.Field() class-attribute ¶

ncbi_tax_id: Series[str] = pa.Field(alias='NCBI_tax_id') class-attribute ¶

relative_abundance: Series[float] = pa.Field(ge=0.0, le=100.0) class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/metaphlan/metaphlan_profile.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

Functions¶

check_compositionality(profile: pd.DataFrame) -> bool classmethod ¶

Check that the percentages per rank add up to a hundred.

Source code in taxpasta/infrastructure/application/metaphlan/metaphlan_profile.py

@pa.dataframe_check
@classmethod
def check_compositionality(cls, profile: pd.DataFrame) -> bool:
    """Check that the percentages per rank add up to a hundred."""
    # Parse the rank from the given lineage.
    rank = profile[cls.clade_name].str.rsplit("|", n=1).str[-1].str[0]
    return np.allclose(
        profile.groupby(rank, sort=False)[cls.relative_abundance].sum(),
        100.0,
        atol=1.0,
    )

metaphlan_profile_reader ¶

Provide a reader for metaphlan profiles.

Attributes¶ Classes¶

MetaphlanProfileReader ¶

Bases: ProfileReader

Define a reader for Metaphlan profiles.

Source code in taxpasta/infrastructure/application/metaphlan/metaphlan_profile_reader.py

class MetaphlanProfileReader(ProfileReader):
    """Define a reader for Metaphlan profiles."""

    @classmethod
    def read(cls, profile: BufferOrFilepath) -> DataFrame[MetaphlanProfile]:
        """Read a metaphlan taxonomic profile from a file."""
        result = pd.read_table(
            filepath_or_buffer=profile,
            sep="\t",
            skiprows=4,
            header=None,
            index_col=False,
            names=[
                MetaphlanProfile.clade_name,
                MetaphlanProfile.ncbi_tax_id,
                MetaphlanProfile.relative_abundance,
                MetaphlanProfile.additional_species,
            ],
            dtype={MetaphlanProfile.ncbi_tax_id: str},
        )
        cls._check_num_columns(result, MetaphlanProfile)
        return result

Functions¶

read(profile: BufferOrFilepath) -> DataFrame[MetaphlanProfile] classmethod ¶

Read a metaphlan taxonomic profile from a file.

Source code in taxpasta/infrastructure/application/metaphlan/metaphlan_profile_reader.py

@classmethod
def read(cls, profile: BufferOrFilepath) -> DataFrame[MetaphlanProfile]:
    """Read a metaphlan taxonomic profile from a file."""
    result = pd.read_table(
        filepath_or_buffer=profile,
        sep="\t",
        skiprows=4,
        header=None,
        index_col=False,
        names=[
            MetaphlanProfile.clade_name,
            MetaphlanProfile.ncbi_tax_id,
            MetaphlanProfile.relative_abundance,
            MetaphlanProfile.additional_species,
        ],
        dtype={MetaphlanProfile.ncbi_tax_id: str},
    )
    cls._check_num_columns(result, MetaphlanProfile)
    return result

metaphlan_profile_standardisation_service ¶

Provide a standardisation service for metaphlan profiles.

Classes¶

MetaphlanProfileStandardisationService ¶

Define a standardisation service for metaphlan profiles.

Source code in taxpasta/infrastructure/application/metaphlan/metaphlan_profile_standardisation_service.py

class MetaphlanProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for metaphlan profiles."""

    # Metaphlan only reports up to six decimals so this number should be large enough.
    LARGE_INTEGER = int(1e6)

    @classmethod
    @pa.check_types(lazy=True)
    def transform(
        cls, profile: DataFrame[MetaphlanProfile]
    ) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given metaphlan profile.

        Args:
            profile: A taxonomic profile generated by metaphlan.

        Returns:
            A standardized profile.

        """
        return (
            profile[[MetaphlanProfile.ncbi_tax_id, MetaphlanProfile.relative_abundance]]
            .copy()
            .rename(
                columns={
                    MetaphlanProfile.ncbi_tax_id: StandardProfile.taxonomy_id,
                    MetaphlanProfile.relative_abundance: StandardProfile.count,
                }
            )
            .assign(
                **{
                    StandardProfile.taxonomy_id: lambda df: df[
                        StandardProfile.taxonomy_id
                    ]
                    .str.rsplit("|", n=1)
                    .str[-1]
                    .astype(int),
                    StandardProfile.count: lambda df: df[StandardProfile.count]
                    * cls.LARGE_INTEGER,
                }
            )
            .assign(
                **{
                    StandardProfile.count: lambda df: df[StandardProfile.count].astype(
                        int
                    )
                }
            )
        )

Attributes¶

LARGE_INTEGER = int(1000000.0) class-attribute ¶

Functions¶

transform(profile: DataFrame[MetaphlanProfile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given metaphlan profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[MetaphlanProfile]`	A taxonomic profile generated by metaphlan.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Source code in taxpasta/infrastructure/application/metaphlan/metaphlan_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(
    cls, profile: DataFrame[MetaphlanProfile]
) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given metaphlan profile.

    Args:
        profile: A taxonomic profile generated by metaphlan.

    Returns:
        A standardized profile.

    """
    return (
        profile[[MetaphlanProfile.ncbi_tax_id, MetaphlanProfile.relative_abundance]]
        .copy()
        .rename(
            columns={
                MetaphlanProfile.ncbi_tax_id: StandardProfile.taxonomy_id,
                MetaphlanProfile.relative_abundance: StandardProfile.count,
            }
        )
        .assign(
            **{
                StandardProfile.taxonomy_id: lambda df: df[
                    StandardProfile.taxonomy_id
                ]
                .str.rsplit("|", n=1)
                .str[-1]
                .astype(int),
                StandardProfile.count: lambda df: df[StandardProfile.count]
                * cls.LARGE_INTEGER,
            }
        )
        .assign(
            **{
                StandardProfile.count: lambda df: df[StandardProfile.count].astype(
                    int
                )
            }
        )
    )

`motus` ¶

Classes¶

Modules¶

motus_profile ¶

Provide a description of the mOTUs profile format.

Classes¶

MotusProfile ¶

Bases: pa.SchemaModel

Define the expected mOTUs profile format.

Source code in taxpasta/infrastructure/application/motus/motus_profile.py

class MotusProfile(pa.SchemaModel):
    """Define the expected mOTUs profile format."""

    consensus_taxonomy: Series[str] = pa.Field()
    # Pandas cannot handle missing integer values. Thus, we read this column as string.
    ncbi_tax_id: Series[str] = pa.Field(nullable=True)
    read_count: Series[int] = pa.Field(ge=0)

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

consensus_taxonomy: Series[str] = pa.Field() class-attribute ¶

ncbi_tax_id: Series[str] = pa.Field(nullable=True) class-attribute ¶

read_count: Series[int] = pa.Field(ge=0) class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/motus/motus_profile.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

motus_profile_reader ¶

Provide a reader for motus profiles.

Attributes¶ Classes¶

MotusProfileReader ¶

Bases: ProfileReader

Define a reader for mOTUS profiles.

Source code in taxpasta/infrastructure/application/motus/motus_profile_reader.py

class MotusProfileReader(ProfileReader):
    """Define a reader for mOTUS profiles."""

    @classmethod
    def read(cls, profile: BufferOrFilepath) -> DataFrame[MotusProfile]:
        """Read a mOTUs taxonomic profile from a file."""
        result = pd.read_table(
            filepath_or_buffer=profile,
            sep="\t",
            skiprows=3,
            header=None,
            names=[
                MotusProfile.consensus_taxonomy,
                MotusProfile.ncbi_tax_id,
                MotusProfile.read_count,
            ],
            index_col=False,
            dtype={MotusProfile.ncbi_tax_id: str},
        )
        cls._check_num_columns(result, MotusProfile)
        return result

Functions¶

read(profile: BufferOrFilepath) -> DataFrame[MotusProfile] classmethod ¶

Read a mOTUs taxonomic profile from a file.

Source code in taxpasta/infrastructure/application/motus/motus_profile_reader.py

@classmethod
def read(cls, profile: BufferOrFilepath) -> DataFrame[MotusProfile]:
    """Read a mOTUs taxonomic profile from a file."""
    result = pd.read_table(
        filepath_or_buffer=profile,
        sep="\t",
        skiprows=3,
        header=None,
        names=[
            MotusProfile.consensus_taxonomy,
            MotusProfile.ncbi_tax_id,
            MotusProfile.read_count,
        ],
        index_col=False,
        dtype={MotusProfile.ncbi_tax_id: str},
    )
    cls._check_num_columns(result, MotusProfile)
    return result

motus_profile_standardisation_service ¶

Provide a standardisation service for mOTUs profiles.

Classes¶

MotusProfileStandardisationService ¶

Define a standardisation service for mOTUs profiles.

Source code in taxpasta/infrastructure/application/motus/motus_profile_standardisation_service.py

class MotusProfileStandardisationService(ProfileStandardisationService):
    """Define a standardisation service for mOTUs profiles."""

    @classmethod
    @pa.check_types(lazy=True)
    def transform(cls, profile: DataFrame[MotusProfile]) -> DataFrame[StandardProfile]:
        """
        Tidy up and standardize a given mOTUs profile.

        Args:
            profile: A taxonomic profile generated by mOTUs.

        Returns:
            A standardized profile.

        """
        temp = (
            profile.loc[
                profile[MotusProfile.read_count] > 0,
                [MotusProfile.ncbi_tax_id, MotusProfile.read_count],
            ]
            .copy()
            .rename(
                columns={
                    MotusProfile.ncbi_tax_id: StandardProfile.taxonomy_id,
                    MotusProfile.read_count: StandardProfile.count,
                }
            )
        )
        # Split profile into entries with known and unknown tax ID.
        # Ignore entries with zero read count.
        result = (
            temp.loc[temp[StandardProfile.taxonomy_id].notnull(), :]
            .copy()
            .assign(
                **{
                    StandardProfile.taxonomy_id: lambda df: df[
                        StandardProfile.taxonomy_id
                    ].astype(int)
                }
            )
            # FIXME (Moritz): Apparently, mOTUs profiles can contain duplicate tax IDs.
            #  Clarify with Sofia and Maxime. For now, sum up read counts.
            #  https://github.com/taxprofiler/taxpasta/issues/46
            .groupby(StandardProfile.taxonomy_id, as_index=False, sort=False)
            .sum()
        )
        # Sum up all remaining read counts without tax ID to be 'unassigned'.
        return pd.concat(
            [
                result,
                pd.DataFrame(
                    {
                        StandardProfile.taxonomy_id: [0],
                        StandardProfile.count: [
                            temp.loc[
                                temp[StandardProfile.taxonomy_id].isnull(),
                                StandardProfile.count,
                            ].sum()
                        ],
                    },
                    dtype=int,
                ),
            ],
            ignore_index=True,
        )

Functions¶

transform(profile: DataFrame[MotusProfile]) -> DataFrame[StandardProfile] classmethod ¶

Tidy up and standardize a given mOTUs profile.

Parameters:

Name	Type	Description	Default
`profile`	`DataFrame[MotusProfile]`	A taxonomic profile generated by mOTUs.	required

Returns:

Type	Description
`DataFrame[StandardProfile]`	A standardized profile.

Source code in taxpasta/infrastructure/application/motus/motus_profile_standardisation_service.py

@classmethod
@pa.check_types(lazy=True)
def transform(cls, profile: DataFrame[MotusProfile]) -> DataFrame[StandardProfile]:
    """
    Tidy up and standardize a given mOTUs profile.

    Args:
        profile: A taxonomic profile generated by mOTUs.

    Returns:
        A standardized profile.

    """
    temp = (
        profile.loc[
            profile[MotusProfile.read_count] > 0,
            [MotusProfile.ncbi_tax_id, MotusProfile.read_count],
        ]
        .copy()
        .rename(
            columns={
                MotusProfile.ncbi_tax_id: StandardProfile.taxonomy_id,
                MotusProfile.read_count: StandardProfile.count,
            }
        )
    )
    # Split profile into entries with known and unknown tax ID.
    # Ignore entries with zero read count.
    result = (
        temp.loc[temp[StandardProfile.taxonomy_id].notnull(), :]
        .copy()
        .assign(
            **{
                StandardProfile.taxonomy_id: lambda df: df[
                    StandardProfile.taxonomy_id
                ].astype(int)
            }
        )
        # FIXME (Moritz): Apparently, mOTUs profiles can contain duplicate tax IDs.
        #  Clarify with Sofia and Maxime. For now, sum up read counts.
        #  https://github.com/taxprofiler/taxpasta/issues/46
        .groupby(StandardProfile.taxonomy_id, as_index=False, sort=False)
        .sum()
    )
    # Sum up all remaining read counts without tax ID to be 'unassigned'.
    return pd.concat(
        [
            result,
            pd.DataFrame(
                {
                    StandardProfile.taxonomy_id: [0],
                    StandardProfile.count: [
                        temp.loc[
                            temp[StandardProfile.taxonomy_id].isnull(),
                            StandardProfile.count,
                        ].sum()
                    ],
                },
                dtype=int,
            ),
        ],
        ignore_index=True,
    )

`sample_etl_application` ¶

Provide a sample ETL application.

Attributes¶

logger = logging.getLogger(__name__) module-attribute ¶

Classes¶

SampleETLApplication ¶

Define the sample ETL application.

Source code in taxpasta/infrastructure/application/sample_etl_application.py

class SampleETLApplication:
    """Define the sample ETL application."""

    def __init__(
        self,
        *,
        profile_reader: Type[ProfileReader],
        profile_standardiser: Type[ProfileStandardisationService],
        **kwargs: dict,
    ):
        """
        Initialize the application for a particular taxonomic profiler.

        Args:
            profile_reader: A profile reader for a specific taxonomic profile format.
            profile_standardiser: A profile standardisation service for a specific
                taxonomic profile format.
            **kwargs: Passed on for inheritance.

        """
        super().__init__(**kwargs)
        self.reader = profile_reader
        self.standardiser = profile_standardiser

    def etl(self, profile: Path, name: Optional[str] = None) -> Sample:
        """
        Extract, transform, and load a profile into a sample.

        Args:
            profile: A taxonomic profile.
            name: An optional name for the sample. Otherwise, the profile's filename is
                used.

        Returns:
            A sample.

        Raises:
            StandardisationError: If the given profile does not match the validation
                schema.  # noqa: DAR402

        """
        if name is None:
            name = profile.stem
        try:
            return Sample(
                name=name,
                profile=self.standardiser.transform(self.reader.read(profile)),
            )
        except SchemaErrors as errors:
            raise StandardisationError(
                sample=profile.stem, profile=profile, message=str(errors.failure_cases)
            ) from errors

Attributes¶

reader = profile_reader instance-attribute ¶

standardiser = profile_standardiser instance-attribute ¶

Functions¶

__init__(*, profile_reader: Type[ProfileReader], profile_standardiser: Type[ProfileStandardisationService], **kwargs: dict)

¶

Initialize the application for a particular taxonomic profiler.

Parameters:

Name	Type	Description	Default
`profile_reader`	`Type[ProfileReader]`	A profile reader for a specific taxonomic profile format.	required
`profile_standardiser`	`Type[ProfileStandardisationService]`	A profile standardisation service for a specific taxonomic profile format.	required
`**kwargs`	`dict`	Passed on for inheritance.	`{}`

Source code in taxpasta/infrastructure/application/sample_etl_application.py

def __init__(
    self,
    *,
    profile_reader: Type[ProfileReader],
    profile_standardiser: Type[ProfileStandardisationService],
    **kwargs: dict,
):
    """
    Initialize the application for a particular taxonomic profiler.

    Args:
        profile_reader: A profile reader for a specific taxonomic profile format.
        profile_standardiser: A profile standardisation service for a specific
            taxonomic profile format.
        **kwargs: Passed on for inheritance.

    """
    super().__init__(**kwargs)
    self.reader = profile_reader
    self.standardiser = profile_standardiser

etl(profile: Path, name: Optional[str] = None) -> Sample ¶

Extract, transform, and load a profile into a sample.

Parameters:

Name	Type	Description	Default
`profile`	`Path`	A taxonomic profile.	required
`name`	`Optional[str]`	An optional name for the sample. Otherwise, the profile's filename is used.	`None`

Returns:

Type	Description
`Sample`	A sample.

Raises:

Type	Description
`StandardisationError`	If the given profile does not match the validation schema. # noqa: DAR402

Source code in taxpasta/infrastructure/application/sample_etl_application.py

def etl(self, profile: Path, name: Optional[str] = None) -> Sample:
    """
    Extract, transform, and load a profile into a sample.

    Args:
        profile: A taxonomic profile.
        name: An optional name for the sample. Otherwise, the profile's filename is
            used.

    Returns:
        A sample.

    Raises:
        StandardisationError: If the given profile does not match the validation
            schema.  # noqa: DAR402

    """
    if name is None:
        name = profile.stem
    try:
        return Sample(
            name=name,
            profile=self.standardiser.transform(self.reader.read(profile)),
        )
    except SchemaErrors as errors:
        raise StandardisationError(
            sample=profile.stem, profile=profile, message=str(errors.failure_cases)
        ) from errors

`sample_sheet` ¶

Provide a description of samples and profile locations.

Classes¶

SampleSheet ¶

Bases: pa.SchemaModel

Define a description of samples and profile locations.

Source code in taxpasta/infrastructure/application/sample_sheet.py

class SampleSheet(pa.SchemaModel):
    """Define a description of samples and profile locations."""

    sample: Series[str] = pa.Field()
    profile: Series[str] = pa.Field()  # type: ignore

    @pa.dataframe_check
    @classmethod
    def check_number_samples(cls, table: DataFrame) -> bool:
        """Check that there are at least two samples."""
        return (table[cls.sample].notnull() & table[cls.profile].notnull()).sum() > 1

    @pa.check("profile", name="profile_presence")
    @classmethod
    def check_profile_presence(
        cls, profile: Series[str]  # type: ignore
    ) -> Series[bool]:
        """Check that every profile is present at the specified location."""
        return cast(Series[bool], profile.map(lambda path: Path(path).is_file()))

    class Config:
        """Configure the schema model."""

        coerce = True
        ordered = True
        strict = True

Attributes¶

profile: Series[str] = pa.Field() class-attribute ¶

sample: Series[str] = pa.Field() class-attribute ¶

Classes¶

Config ¶

Configure the schema model.

Source code in taxpasta/infrastructure/application/sample_sheet.py

class Config:
    """Configure the schema model."""

    coerce = True
    ordered = True
    strict = True

Attributes¶

coerce = True class-attribute ¶

ordered = True class-attribute ¶

strict = True class-attribute ¶

Functions¶

check_number_samples(table: DataFrame) -> bool classmethod ¶

Check that there are at least two samples.

Source code in taxpasta/infrastructure/application/sample_sheet.py

@pa.dataframe_check
@classmethod
def check_number_samples(cls, table: DataFrame) -> bool:
    """Check that there are at least two samples."""
    return (table[cls.sample].notnull() & table[cls.profile].notnull()).sum() > 1

check_profile_presence(profile: Series[str]) -> Series[bool] classmethod ¶

Check that every profile is present at the specified location.

Source code in taxpasta/infrastructure/application/sample_sheet.py

@pa.check("profile", name="profile_presence")
@classmethod
def check_profile_presence(
    cls, profile: Series[str]  # type: ignore
) -> Series[bool]:
    """Check that every profile is present at the specified location."""
    return cast(Series[bool], profile.map(lambda path: Path(path).is_file()))

`standard_profile_file_format` ¶

Provide a service for supported tabular file formats.

Classes¶

StandardProfileFileFormat ¶

Bases: str, DependencyCheckMixin, Enum

Define the supported standardized profile file formats.

Source code in taxpasta/infrastructure/application/standard_profile_file_format.py

@unique
class StandardProfileFileFormat(str, DependencyCheckMixin, Enum):
    """Define the supported standardized profile file formats."""

    TSV = "TSV"
    CSV = "CSV"
    ODS = "ODS"
    XLSX = "XLSX"
    arrow = "arrow"
    parquet = "parquet"

Attributes¶

CSV = 'CSV' class-attribute ¶

ODS = 'ODS' class-attribute ¶

TSV = 'TSV' class-attribute ¶

XLSX = 'XLSX' class-attribute ¶

arrow = 'arrow' class-attribute ¶

parquet = 'parquet' class-attribute ¶

`standard_profile_writer` ¶

Modules¶

arrow_standard_profile_writer ¶

Provide an arrow writer.

Attributes¶ Classes¶

ArrowStandardProfileWriter ¶

Define the arrow writer.

Source code in taxpasta/infrastructure/application/standard_profile_writer/arrow_standard_profile_writer.py

class ArrowStandardProfileWriter(StandardProfileWriter):
    """Define the arrow writer."""

    @classmethod
    def write(
        cls,
        profile: DataFrame[StandardProfile],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given standardized profile to the given buffer or file."""
        profile.to_feather(target, **kwargs)

Functions¶

write(profile: DataFrame[StandardProfile], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given standardized profile to the given buffer or file.

Source code in taxpasta/infrastructure/application/standard_profile_writer/arrow_standard_profile_writer.py

@classmethod
def write(
    cls,
    profile: DataFrame[StandardProfile],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given standardized profile to the given buffer or file."""
    profile.to_feather(target, **kwargs)

csv_standard_profile_writer ¶

Provide a CSV writer.

Attributes¶ Classes¶

CSVStandardProfileWriter ¶

Define the CSV writer.

Source code in taxpasta/infrastructure/application/standard_profile_writer/csv_standard_profile_writer.py

class CSVStandardProfileWriter(StandardProfileWriter):
    """Define the CSV writer."""

    @classmethod
    def write(
        cls,
        profile: DataFrame[StandardProfile],
        target: BufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given standardized profile to the given buffer or file."""
        profile.to_csv(target, index=False, **kwargs)

Functions¶

write(profile: DataFrame[StandardProfile], target: BufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given standardized profile to the given buffer or file.

Source code in taxpasta/infrastructure/application/standard_profile_writer/csv_standard_profile_writer.py

@classmethod
def write(
    cls,
    profile: DataFrame[StandardProfile],
    target: BufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given standardized profile to the given buffer or file."""
    profile.to_csv(target, index=False, **kwargs)

ods_standard_profile_writer ¶

Provide an ODS writer.

Attributes¶ Classes¶

ODSStandardProfileWriter ¶

Define the ODS writer.

Source code in taxpasta/infrastructure/application/standard_profile_writer/ods_standard_profile_writer.py

class ODSStandardProfileWriter(StandardProfileWriter):
    """Define the ODS writer."""

    @classmethod
    def write(
        cls,
        profile: DataFrame[StandardProfile],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given standardized profile to the given buffer or file."""
        profile.to_excel(target, index=False, engine="odf", **kwargs)

Functions¶

write(profile: DataFrame[StandardProfile], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given standardized profile to the given buffer or file.

Source code in taxpasta/infrastructure/application/standard_profile_writer/ods_standard_profile_writer.py

@classmethod
def write(
    cls,
    profile: DataFrame[StandardProfile],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given standardized profile to the given buffer or file."""
    profile.to_excel(target, index=False, engine="odf", **kwargs)

parquet_standard_profile_writer ¶

Provide an parquet writer.

Attributes¶ Classes¶

ParquetStandardProfileWriter ¶

Define the parquet writer.

Source code in taxpasta/infrastructure/application/standard_profile_writer/parquet_standard_profile_writer.py

class ParquetStandardProfileWriter(StandardProfileWriter):
    """Define the parquet writer."""

    @classmethod
    def write(
        cls,
        profile: DataFrame[StandardProfile],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given standardized profile to the given buffer or file."""
        profile.to_parquet(target, **kwargs)

Functions¶

write(profile: DataFrame[StandardProfile], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given standardized profile to the given buffer or file.

Source code in taxpasta/infrastructure/application/standard_profile_writer/parquet_standard_profile_writer.py

@classmethod
def write(
    cls,
    profile: DataFrame[StandardProfile],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given standardized profile to the given buffer or file."""
    profile.to_parquet(target, **kwargs)

tsv_standard_profile_writer ¶

Provide an TSV writer.

Attributes¶ Classes¶

TSVStandardProfileWriter ¶

Define the TSV writer.

Source code in taxpasta/infrastructure/application/standard_profile_writer/tsv_standard_profile_writer.py

class TSVStandardProfileWriter(StandardProfileWriter):
    """Define the TSV writer."""

    @classmethod
    def write(
        cls,
        profile: DataFrame[StandardProfile],
        target: BufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given standardized profile to the given buffer or file."""
        profile.to_csv(target, sep="\t", index=False, **kwargs)

Functions¶

write(profile: DataFrame[StandardProfile], target: BufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given standardized profile to the given buffer or file.

Source code in taxpasta/infrastructure/application/standard_profile_writer/tsv_standard_profile_writer.py

@classmethod
def write(
    cls,
    profile: DataFrame[StandardProfile],
    target: BufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given standardized profile to the given buffer or file."""
    profile.to_csv(target, sep="\t", index=False, **kwargs)

xlsx_standard_profile_writer ¶

Provide an XLSX writer.

Attributes¶ Classes¶

XLSXStandardProfileWriter ¶

Define the XLSX writer.

Source code in taxpasta/infrastructure/application/standard_profile_writer/xlsx_standard_profile_writer.py

class XLSXStandardProfileWriter(StandardProfileWriter):
    """Define the XLSX writer."""

    @classmethod
    def write(
        cls,
        profile: DataFrame[StandardProfile],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given standardized profile to the given buffer or file."""
        profile.to_excel(target, index=False, engine="openpyxl", **kwargs)

Functions¶

write(profile: DataFrame[StandardProfile], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given standardized profile to the given buffer or file.

Source code in taxpasta/infrastructure/application/standard_profile_writer/xlsx_standard_profile_writer.py

@classmethod
def write(
    cls,
    profile: DataFrame[StandardProfile],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given standardized profile to the given buffer or file."""
    profile.to_excel(target, index=False, engine="openpyxl", **kwargs)

`supported_profiler` ¶

Provide an enumeration of supported taxonomic profilers.

Classes¶

SupportedProfiler ¶

Bases: str, Enum

Define supported taxonomic profilers.

Source code in taxpasta/infrastructure/application/supported_profiler.py

@unique
class SupportedProfiler(str, Enum):
    """Define supported taxonomic profilers."""

    bracken = "bracken"
    centrifuge = "centrifuge"
    diamond = "diamond"
    kaiju = "kaiju"
    kraken2 = "kraken2"
    krakenuniq = "krakenuniq"
    megan6 = "megan6"
    metaphlan = "metaphlan"
    motus = "motus"

Attributes¶

bracken = 'bracken' class-attribute ¶

centrifuge = 'centrifuge' class-attribute ¶

diamond = 'diamond' class-attribute ¶

kaiju = 'kaiju' class-attribute ¶

kraken2 = 'kraken2' class-attribute ¶

krakenuniq = 'krakenuniq' class-attribute ¶

megan6 = 'megan6' class-attribute ¶

metaphlan = 'metaphlan' class-attribute ¶

motus = 'motus' class-attribute ¶

`table_reader` ¶

Modules¶

arrow_table_reader ¶

Provide an arrow reader.

Attributes¶ Classes¶

ArrowTableReader ¶

Bases: TableReader

Define the arrow reader.

Source code in taxpasta/infrastructure/application/table_reader/arrow_table_reader.py

class ArrowTableReader(TableReader):
    """Define the arrow reader."""

    @classmethod
    def read(cls, source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame:
        """Read arrow from the given source."""
        return pd.read_feather(source, **kwargs)

Functions¶

read(source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame classmethod ¶

Read arrow from the given source.

Source code in taxpasta/infrastructure/application/table_reader/arrow_table_reader.py

@classmethod
def read(cls, source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame:
    """Read arrow from the given source."""
    return pd.read_feather(source, **kwargs)

csv_table_reader ¶

Provide a CSV reader.

Attributes¶ Classes¶

CSVTableReader ¶

Bases: TableReader

Define the CSV reader.

Source code in taxpasta/infrastructure/application/table_reader/csv_table_reader.py

class CSVTableReader(TableReader):
    """Define the CSV reader."""

    @classmethod
    def read(cls, source: BufferOrFilepath, **kwargs) -> pd.DataFrame:
        """Read CSV from the given source."""
        return pd.read_csv(source, **kwargs)

Functions¶

read(source: BufferOrFilepath, **kwargs) -> pd.DataFrame classmethod ¶

Read CSV from the given source.

Source code in taxpasta/infrastructure/application/table_reader/csv_table_reader.py

@classmethod
def read(cls, source: BufferOrFilepath, **kwargs) -> pd.DataFrame:
    """Read CSV from the given source."""
    return pd.read_csv(source, **kwargs)

ods_table_reader ¶

Provide an ODS reader.

Attributes¶ Classes¶

ODSTableReader ¶

Bases: TableReader

Define the ODS reader.

Source code in taxpasta/infrastructure/application/table_reader/ods_table_reader.py

class ODSTableReader(TableReader):
    """Define the ODS reader."""

    @classmethod
    def read(cls, source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame:
        """Read ODS from the given source."""
        return pd.read_excel(source, engine="odf", **kwargs)

Functions¶

read(source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame classmethod ¶

Read ODS from the given source.

Source code in taxpasta/infrastructure/application/table_reader/ods_table_reader.py

@classmethod
def read(cls, source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame:
    """Read ODS from the given source."""
    return pd.read_excel(source, engine="odf", **kwargs)

parquet_table_reader ¶

Provide an parquet reader.

Attributes¶ Classes¶

ParquetTableReader ¶

Bases: TableReader

Define the parquet reader.

Source code in taxpasta/infrastructure/application/table_reader/parquet_table_reader.py

class ParquetTableReader(TableReader):
    """Define the parquet reader."""

    @classmethod
    def read(cls, source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame:
        """Read parquet from the given source."""
        return pd.read_parquet(source, **kwargs)

Functions¶

read(source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame classmethod ¶

Read parquet from the given source.

Source code in taxpasta/infrastructure/application/table_reader/parquet_table_reader.py

@classmethod
def read(cls, source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame:
    """Read parquet from the given source."""
    return pd.read_parquet(source, **kwargs)

tsv_table_reader ¶

Provide a TSV reader.

Attributes¶ Classes¶

TSVTableReader ¶

Bases: TableReader

Define the TSV reader.

Source code in taxpasta/infrastructure/application/table_reader/tsv_table_reader.py

class TSVTableReader(TableReader):
    """Define the TSV reader."""

    @classmethod
    def read(cls, source: BufferOrFilepath, **kwargs) -> pd.DataFrame:
        """Read TSV from the given source."""
        return pd.read_table(source, sep="\t", **kwargs)

Functions¶

read(source: BufferOrFilepath, **kwargs) -> pd.DataFrame classmethod ¶

Read TSV from the given source.

Source code in taxpasta/infrastructure/application/table_reader/tsv_table_reader.py

@classmethod
def read(cls, source: BufferOrFilepath, **kwargs) -> pd.DataFrame:
    """Read TSV from the given source."""
    return pd.read_table(source, sep="\t", **kwargs)

xlsx_table_reader ¶

Provide an XLSX reader.

Attributes¶ Classes¶

XLSXTableReader ¶

Bases: TableReader

Define the XLSX reader.

Source code in taxpasta/infrastructure/application/table_reader/xlsx_table_reader.py

class XLSXTableReader(TableReader):
    """Define the XLSX reader."""

    @classmethod
    def read(cls, source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame:
        """Read XLSX from the given source."""
        return pd.read_excel(source, engine="openpyxl", **kwargs)

Functions¶

read(source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame classmethod ¶

Read XLSX from the given source.

Source code in taxpasta/infrastructure/application/table_reader/xlsx_table_reader.py

@classmethod
def read(cls, source: BinaryBufferOrFilepath, **kwargs) -> pd.DataFrame:
    """Read XLSX from the given source."""
    return pd.read_excel(source, engine="openpyxl", **kwargs)

`table_reader_file_format` ¶

Provide a service for supported tabular file formats.

Classes¶

TableReaderFileFormat ¶

Bases: str, DependencyCheckMixin, Enum

Define the supported tabular file formats.

Source code in taxpasta/infrastructure/application/table_reader_file_format.py

@unique
class TableReaderFileFormat(str, DependencyCheckMixin, Enum):
    """Define the supported tabular file formats."""

    TSV = "TSV"
    CSV = "CSV"
    ODS = "ODS"
    XLSX = "XLSX"
    arrow = "arrow"
    parquet = "parquet"

Attributes¶

CSV = 'CSV' class-attribute ¶

ODS = 'ODS' class-attribute ¶

TSV = 'TSV' class-attribute ¶

XLSX = 'XLSX' class-attribute ¶

arrow = 'arrow' class-attribute ¶

parquet = 'parquet' class-attribute ¶

`tidy_observation_table_file_format` ¶

Provide a service for supported tabular file formats.

Classes¶

TidyObservationTableFileFormat ¶

Bases: str, DependencyCheckMixin, Enum

Define the supported tabular file formats.

Source code in taxpasta/infrastructure/application/tidy_observation_table_file_format.py

@unique
class TidyObservationTableFileFormat(str, DependencyCheckMixin, Enum):
    """Define the supported tabular file formats."""

    TSV = "TSV"
    CSV = "CSV"
    ODS = "ODS"
    XLSX = "XLSX"
    arrow = "arrow"
    parquet = "parquet"

Attributes¶

CSV = 'CSV' class-attribute ¶

ODS = 'ODS' class-attribute ¶

TSV = 'TSV' class-attribute ¶

XLSX = 'XLSX' class-attribute ¶

arrow = 'arrow' class-attribute ¶

parquet = 'parquet' class-attribute ¶

`tidy_observation_table_writer` ¶

Modules¶

arrow_table_writer ¶

Provide an arrow writer.

Attributes¶ Classes¶

ArrowTidyObservationTableWriter ¶

Define the arrow writer.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/arrow_table_writer.py

class ArrowTidyObservationTableWriter(TidyObservationTableWriter):
    """Define the arrow writer."""

    @classmethod
    def write(
        cls,
        table: DataFrame[TidyObservationTable],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        table.to_feather(target, **kwargs)

Functions¶

write(table: DataFrame[TidyObservationTable], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/arrow_table_writer.py

@classmethod
def write(
    cls,
    table: DataFrame[TidyObservationTable],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    table.to_feather(target, **kwargs)

csv_table_writer ¶

Provide a CSV writer.

Attributes¶ Classes¶

CSVTidyObservationTableWriter ¶

Define the CSV writer.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/csv_table_writer.py

class CSVTidyObservationTableWriter(TidyObservationTableWriter):
    """Define the CSV writer."""

    @classmethod
    def write(
        cls,
        table: DataFrame[TidyObservationTable],
        target: BufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        table.to_csv(target, index=False, **kwargs)

Functions¶

write(table: DataFrame[TidyObservationTable], target: BufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/csv_table_writer.py

@classmethod
def write(
    cls,
    table: DataFrame[TidyObservationTable],
    target: BufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    table.to_csv(target, index=False, **kwargs)

ods_table_writer ¶

Provide an ODS writer.

Attributes¶ Classes¶

ODSTidyObservationTableWriter ¶

Define the ODS writer.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/ods_table_writer.py

class ODSTidyObservationTableWriter(TidyObservationTableWriter):
    """Define the ODS writer."""

    @classmethod
    def write(
        cls,
        table: DataFrame[TidyObservationTable],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        table.to_excel(target, index=False, engine="odf", **kwargs)

Functions¶

write(table: DataFrame[TidyObservationTable], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/ods_table_writer.py

@classmethod
def write(
    cls,
    table: DataFrame[TidyObservationTable],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    table.to_excel(target, index=False, engine="odf", **kwargs)

parquet_table_writer ¶

Provide an parquet writer.

Attributes¶ Classes¶

ParquetTidyObservationTableWriter ¶

Define the parquet writer.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/parquet_table_writer.py

class ParquetTidyObservationTableWriter(TidyObservationTableWriter):
    """Define the parquet writer."""

    @classmethod
    def write(
        cls,
        table: DataFrame[TidyObservationTable],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        table.to_parquet(target, **kwargs)

Functions¶

write(table: DataFrame[TidyObservationTable], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/parquet_table_writer.py

@classmethod
def write(
    cls,
    table: DataFrame[TidyObservationTable],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    table.to_parquet(target, **kwargs)

tsv_table_writer ¶

Provide an TSV writer.

Attributes¶ Classes¶

TSVTidyObservationTableWriter ¶

Define the TSV writer.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/tsv_table_writer.py

class TSVTidyObservationTableWriter(TidyObservationTableWriter):
    """Define the TSV writer."""

    @classmethod
    def write(
        cls,
        table: DataFrame[TidyObservationTable],
        target: BufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        table.to_csv(target, sep="\t", index=False, **kwargs)

Functions¶

write(table: DataFrame[TidyObservationTable], target: BufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/tsv_table_writer.py

@classmethod
def write(
    cls,
    table: DataFrame[TidyObservationTable],
    target: BufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    table.to_csv(target, sep="\t", index=False, **kwargs)

xlsx_table_writer ¶

Provide an XLSX writer.

Attributes¶ Classes¶

XLSXTidyObservationTableWriter ¶

Define the XLSX writer.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/xlsx_table_writer.py

class XLSXTidyObservationTableWriter(TidyObservationTableWriter):
    """Define the XLSX writer."""

    @classmethod
    def write(
        cls,
        table: DataFrame[TidyObservationTable],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        table.to_excel(target, index=False, engine="openpyxl", **kwargs)

Functions¶

write(table: DataFrame[TidyObservationTable], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/tidy_observation_table_writer/xlsx_table_writer.py

@classmethod
def write(
    cls,
    table: DataFrame[TidyObservationTable],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    table.to_excel(target, index=False, engine="openpyxl", **kwargs)

`wide_observation_table_file_format` ¶

Provide a service for supported container file formats.

Classes¶

WideObservationTableFileFormat ¶

Bases: str, DependencyCheckMixin, Enum

Define the supported container file formats.

Source code in taxpasta/infrastructure/application/wide_observation_table_file_format.py

@unique
class WideObservationTableFileFormat(str, DependencyCheckMixin, Enum):
    """Define the supported container file formats."""

    TSV = "TSV"
    CSV = "CSV"
    ODS = "ODS"
    XLSX = "XLSX"
    arrow = "arrow"
    parquet = "parquet"
    BIOM = "BIOM"

Attributes¶

BIOM = 'BIOM' class-attribute ¶

CSV = 'CSV' class-attribute ¶

ODS = 'ODS' class-attribute ¶

TSV = 'TSV' class-attribute ¶

XLSX = 'XLSX' class-attribute ¶

arrow = 'arrow' class-attribute ¶

parquet = 'parquet' class-attribute ¶

`wide_observation_table_writer` ¶

Modules¶

arrow_wide_observation_table_writer ¶

Provide an arrow writer.

Attributes¶ Classes¶

ArrowWideObservationTableWriter ¶

Define the arrow writer.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/arrow_wide_observation_table_writer.py

class ArrowWideObservationTableWriter(WideObservationTableWriter):
    """Define the arrow writer."""

    @classmethod
    def write(
        cls,
        matrix: DataFrame[WideObservationTable],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        matrix.to_feather(target, **kwargs)

Functions¶

write(matrix: DataFrame[WideObservationTable], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/arrow_wide_observation_table_writer.py

@classmethod
def write(
    cls,
    matrix: DataFrame[WideObservationTable],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    matrix.to_feather(target, **kwargs)

biom_wide_observation_table_writer ¶

Provide a Biological Observation Matrix (BIOM) writer.

Attributes¶ Classes¶

BIOMWideObservationTableWriter ¶

Define the Biological Observation Matrix (BIOM) writer.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/biom_wide_observation_table_writer.py

class BIOMWideObservationTableWriter(WideObservationTableWriter):
    """Define the Biological Observation Matrix (BIOM) writer."""

    @classmethod
    def write(
        cls,
        matrix: DataFrame[WideObservationTable],
        target: Filepath,
        taxonomy: Optional[Taxonomy] = None,
        generated_by: str = "taxpasta",
        **kwargs
    ) -> None:
        """Write the given data to the given buffer or file."""
        result = Table(
            data=matrix.iloc[:, 1:].values,
            observation_ids=matrix.iloc[:, 0].astype(str),
            sample_ids=matrix.columns[1:],
        )
        with biom_open(str(target), permission="w") as handle:
            result.to_hdf5(handle, generated_by=generated_by)

Functions¶

write(matrix: DataFrame[WideObservationTable], target: Filepath, taxonomy: Optional[Taxonomy] = None, generated_by: str = 'taxpasta', **kwargs) -> None

classmethod ¶

Write the given data to the given buffer or file.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/biom_wide_observation_table_writer.py

@classmethod
def write(
    cls,
    matrix: DataFrame[WideObservationTable],
    target: Filepath,
    taxonomy: Optional[Taxonomy] = None,
    generated_by: str = "taxpasta",
    **kwargs
) -> None:
    """Write the given data to the given buffer or file."""
    result = Table(
        data=matrix.iloc[:, 1:].values,
        observation_ids=matrix.iloc[:, 0].astype(str),
        sample_ids=matrix.columns[1:],
    )
    with biom_open(str(target), permission="w") as handle:
        result.to_hdf5(handle, generated_by=generated_by)

csv_wide_observation_table_writer ¶

Provide a CSV writer.

Attributes¶ Classes¶

CSVWideObservationTableWriter ¶

Define the CSV writer.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/csv_wide_observation_table_writer.py

class CSVWideObservationTableWriter(WideObservationTableWriter):
    """Define the CSV writer."""

    @classmethod
    def write(
        cls,
        matrix: DataFrame[WideObservationTable],
        target: BufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        matrix.to_csv(target, index=False, **kwargs)

Functions¶

write(matrix: DataFrame[WideObservationTable], target: BufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/csv_wide_observation_table_writer.py

@classmethod
def write(
    cls,
    matrix: DataFrame[WideObservationTable],
    target: BufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    matrix.to_csv(target, index=False, **kwargs)

ods_wide_observation_table_writer ¶

Provide an ODS writer.

Attributes¶ Classes¶

ODSWideObservationTableWriter ¶

Define the ODS writer.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/ods_wide_observation_table_writer.py

class ODSWideObservationTableWriter(WideObservationTableWriter):
    """Define the ODS writer."""

    @classmethod
    def write(
        cls,
        matrix: DataFrame[WideObservationTable],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        matrix.to_excel(target, index=False, engine="odf", **kwargs)

Functions¶

write(matrix: DataFrame[WideObservationTable], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/ods_wide_observation_table_writer.py

@classmethod
def write(
    cls,
    matrix: DataFrame[WideObservationTable],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    matrix.to_excel(target, index=False, engine="odf", **kwargs)

parquet_wide_observation_table_writer ¶

Provide an parquet writer.

Attributes¶ Classes¶

ParquetWideObservationTableWriter ¶

Define the parquet writer.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/parquet_wide_observation_table_writer.py

class ParquetWideObservationTableWriter(WideObservationTableWriter):
    """Define the parquet writer."""

    @classmethod
    def write(
        cls,
        matrix: DataFrame[WideObservationTable],
        target: BinaryBufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        matrix.to_parquet(target, **kwargs)

Functions¶

write(matrix: DataFrame[WideObservationTable], target: BinaryBufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/parquet_wide_observation_table_writer.py

@classmethod
def write(
    cls,
    matrix: DataFrame[WideObservationTable],
    target: BinaryBufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    matrix.to_parquet(target, **kwargs)

tsv_wide_observation_table_writer ¶

Provide an TSV writer.

Attributes¶ Classes¶

TSVWideObservationTableWriter ¶

Define the TSV writer.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/tsv_wide_observation_table_writer.py

class TSVWideObservationTableWriter(WideObservationTableWriter):
    """Define the TSV writer."""

    @classmethod
    def write(
        cls,
        matrix: DataFrame[WideObservationTable],
        target: BufferOrFilepath,
        taxonomy: Optional[Taxonomy] = None,
        **kwargs
    ) -> None:
        """Write the given table to the given buffer or file."""
        matrix.to_csv(target, sep="\t", index=False, **kwargs)

Functions¶

write(matrix: DataFrame[WideObservationTable], target: BufferOrFilepath, taxonomy: Optional[Taxonomy] = None, **kwargs) -> None

classmethod ¶

Write the given table to the given buffer or file.

Source code in taxpasta/infrastructure/application/wide_observation_table_writer/tsv_wide_observation_table_writer.py

@classmethod
def write(
    cls,
    matrix: DataFrame[WideObservationTable],
    target: BufferOrFilepath,
    taxonomy: Optional[Taxonomy] = None,
    **kwargs
) -> None:
    """Write the given table to the given buffer or file."""
    matrix.to_csv(target, sep="\t", index=False, **kwargs)

xlsx_wide_observation_table_writer ¶

Provide an XLSX writer.

Attributes¶ Classes¶

XLSXWideObservationTableWriter ¶