Skip to content
Snippets Groups Projects
Commit 729c88d7 authored by Manavalan Gajapathy's avatar Manavalan Gajapathy
Browse files

refactors ped creator to include sex

parent 282365d1
No related branches found
No related tags found
No related merge requests found
"""
Create dummy ped file by project
Usage:
ml reset
ml Anaconda3
conda activate quac_common
python src/create_dummy_ped.py
"""
from pathlib import Path
import pandas as pd
def main(project_name, outpath):
def read_project_tracker(project_tracker_f):
project_path = Path("/data/project/worthey_lab/projects") / project_name / "analysis"
df = pd.read_excel(project_tracker_f, usecols=["CGDS ID", "Sex"])
df["sex_ped"] = df["Sex"].str.lower().map({"m": "1", "f": "2"})
df["sex_ped"] = df["sex_ped"].fillna("-9") # unknown sex
sample_sex_dict = df.set_index("CGDS ID")["sex_ped"].to_dict()
return sample_sex_dict
def nbbbb():
project_path = Path("/data/project/worthey_lab/projects") / project_name / "analysis"
samples = (
f.name for f in project_path.iterdir() if f.is_dir() and f.name.startswith(("LW", "UDN"))
)
......@@ -24,14 +42,49 @@ def main(project_name, outpath):
return None
def main(outpath):
project_dict = {
"CF_CFF_PFarrell": {
"tracking_sheet": "data/external/project_tracker/PROJECT TRACKING -CF.xlsx",
"affected": "all",
},
# "CF_TLOAF_PFarrell",
# "EDS3_unkn_DGreenspan",
# "MuscDyst_SU_MAlexander",
# "UDN_Phase1_EAWorthey",
}
for project_name in project_dict:
# get sample's sex info from project tracking sheet
sample_sex_dict = read_project_tracker(project_dict[project_name]["tracking_sheet"])
# get samples from cheaha for the project
project_path = Path("/data/project/worthey_lab/projects") / project_name / "analysis"
samples = (
f.name
for f in project_path.iterdir()
if f.is_dir() and f.name.startswith(("LW", "UDN"))
)
header = ["#family_id", "sample_id", "paternal_id", "maternal_id", "sex", "phenotype"]
with open(Path(outpath) / f"{project_name}.ped", "w") as out_handle:
out_handle.write("\t".join(header) + "\n")
for sample in sorted(samples):
data = [
"unknown",
sample,
"-9", # father
"-9", # mother
sample_sex_dict[sample], # sample sex
"1" if project_dict[project_name]["affected"] == "all" else "-9", # affected
]
out_handle.write("\t".join(data) + "\n")
return None
if __name__ == "__main__":
OUT_PATH = "data/raw/ped" # not so raw, is it?
# PROJECT_NAME = "MuscDyst_SU_MAlexander"
for PROJECT_NAME in [
"CF_TLOAF_PFarrell",
"CF_CFF_PFarrell",
"EDS3_unkn_DGreenspan",
"MuscDyst_SU_MAlexander",
"UDN_Phase1_EAWorthey",
]:
main(PROJECT_NAME, OUT_PATH)
main(OUT_PATH)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment