rule verifybamid:
    input:
        bam = PROJECTS_PATH / "{project}" / "analysis" / "{sample}" / "bam" / "{sample}.bam",
        bam_index = PROJECTS_PATH / "{project}" / "analysis" / "{sample}" / "bam" / "{sample}.bam.bai",
        ref_genome = config['ref'],
        svd = expand(f"{config['verifyBamID']['svd_dat']}.{{ext}}",
                    ext=['bed', 'mu', 'UD'])
    output:
        ancestry = PROCESSED_DIR / "verifyBamID/{project}/{sample}.Ancestry",
        selfsm = PROCESSED_DIR / "verifyBamID/{project}/{sample}.selfSM",
    log:
        LOGS_PATH / "{project}/verifyBamID-{sample}.log"
    message:
        "Running VerifyBamID to detect within-species contamination. Project: {wildcards.project}, sample: {wildcards.sample}"
    conda:
        str(WORKFLOW_PATH / "configs/env/verifyBamID.yaml")
    params:
        svd_prefix = lambda wildcards, input: input['svd'][0].replace(Path(input['svd'][0]).suffix, ''),
        out_prefix = lambda wildcards, output: output['ancestry'].replace('.Ancestry', ''),
    shell:
        r"""
        verifybamid2 \
            --SVDPrefix {params.svd_prefix} \
            --Reference {input.ref_genome} \
            --BamFile {input.bam} \
            --Output {params.out_prefix} \
            > {log} 2>&1
        """


rule haplocheck:
    input:
        bam = PROJECTS_PATH / "{project}" / "analysis" / "{sample}" / "bam" / "{sample}.bam",
        bam_index = PROJECTS_PATH / "{project}" / "analysis" / "{sample}" / "bam" / "{sample}.bam.bai",
        mt_ref = config['haplocheck']['mt_ref'],
        mutserve = config['haplocheck']['mutserve_tool'],
        haplocheck = config['haplocheck']['haplocheck_tool'],
    output:
        vcf = PROCESSED_DIR / "haplocheck/{project}/{sample}/MT.vcf",
        result = PROCESSED_DIR / "haplocheck/{project}/{sample}/haplocheck.txt",
    log:
        LOGS_PATH / "{project}/haplocheck-{sample}.log"
    message:
        "Running haplocheck to detect within-species contamination. Project: {wildcards.project}, sample: {wildcards.sample}"
    shell:
        r"""
        # call MT variants from bam using mutserve
        {input.mutserve} call \
            --reference {input.mt_ref} \
            --contig-name chrM \
            --output {output.vcf} \
            {input.bam}
            > {log} 2>&1

        # run haplocheck
        {input.haplocheck} \
            --raw \
            --out {output.result} \
            {output.vcf}
            >> {log} 2>&1
        """