NLP_Project_Code.ipynb 5.5 KB
Newer Older
Zaid A Ali's avatar
Zaid A Ali committed
1
2
3
4
5
6
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    "For this project, our goal is create an NLP model to automatically assign ICD-9 encodings, given the clinical notes at each encounter)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All modules imported successfully\n"
     ]
    }
   ],
   "source": [
    "#imports\n",
    "import pandas as pd\n",
    "print(\"All modules imported successfully\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       ROW_ID_x  SUBJECT_ID  HADM_ID  SEQ_NUM ICD9_CODE  ROW_ID_y   CHARTDATE  \\\n",
      "0          1297         109   172335      1.0     40301     14797  2141-09-24   \n",
      "1          1297         109   172335      1.0     40301     72706  2141-09-21   \n",
      "2          1297         109   172335      1.0     40301    170207  2141-09-18   \n",
      "3          1297         109   172335      1.0     40301    341513  2141-09-21   \n",
      "4          1297         109   172335      1.0     40301    341514  2141-09-21   \n",
      "...         ...         ...      ...      ...       ...       ...         ...   \n",
      "39995       801         101   175533      9.0      2762     15782  2196-10-12   \n",
      "39996       801         101   175533      9.0      2762    170036  2196-09-26   \n",
      "39997       801         101   175533      9.0      2762    170037  2196-09-26   \n",
      "39998       801         101   175533      9.0      2762    170038  2196-09-26   \n",
      "39999       801         101   175533      9.0      2762    173709  2196-09-30   \n",
      "\n",
      "                 CHARTTIME            STORETIME           CATEGORY  \\\n",
      "0                      NaN                  NaN  Discharge summary   \n",
      "1                      NaN                  NaN               Echo   \n",
      "2                      NaN                  NaN                ECG   \n",
      "3      2141-09-21 02:49:00  2141-09-21 02:49:45         Physician    \n",
      "4      2141-09-21 02:49:00  2141-09-21 02:57:11         Physician    \n",
      "...                    ...                  ...                ...   \n",
      "39995                  NaN                  NaN  Discharge summary   \n",
      "39996                  NaN                  NaN                ECG   \n",
      "39997                  NaN                  NaN                ECG   \n",
      "39998                  NaN                  NaN                ECG   \n",
      "39999                  NaN                  NaN                ECG   \n",
      "\n",
      "                             DESCRIPTION     CGID  ISERROR  \\\n",
      "0                                 Report      NaN      NaN   \n",
      "1                                 Report      NaN      NaN   \n",
      "2                                 Report      NaN      NaN   \n",
      "3      Physician Resident Admission Note  17650.0      NaN   \n",
      "4      Physician Resident Admission Note  17650.0      NaN   \n",
      "...                                  ...      ...      ...   \n",
      "39995                             Report      NaN      NaN   \n",
      "39996                             Report      NaN      NaN   \n",
      "39997                             Report      NaN      NaN   \n",
      "39998                             Report      NaN      NaN   \n",
      "39999                             Report      NaN      NaN   \n",
      "\n",
      "                                                    TEXT  \n",
      "0      Admission Date:  [**2141-9-18**]              ...  \n",
      "1      PATIENT/TEST INFORMATION:\\nIndication: Pericar...  \n",
      "2      Sinus rhythm\\nRightward axis\\nSince previous t...  \n",
      "3      Chief Complaint:  hypotension, altered mental ...  \n",
      "4      Chief Complaint:  hypotension, altered mental ...  \n",
      "...                                                  ...  \n",
      "39995  Admission Date:  [**2196-9-26**]     Discharge...  \n",
      "39996  Baseline artifact\\nSinus rhythm\\nGeneralized l...  \n",
      "39997  Baseline artifact\\nProbable atrial flutter wit...  \n",
      "39998  Baseline artifact\\nProbable atrial flutter wit...  \n",
      "39999  Wide complex tachycardia with a right bundle-b...  \n",
      "\n",
      "[40000 rows x 14 columns]\n"
     ]
    }
   ],
   "source": [
Zaid A Ali's avatar
Zaid A Ali committed
95
96
97
    "\n",
    "\n",
    "\n",
98
    "    \n",
Zaid A Ali's avatar
Zaid A Ali committed
99
100
    "\n",
    "\n",
101
102
103
104
    "diagnoses = pd.read_csv(\"DIAGNOSES_ICD.csv\")\n",
    "note_events = pd.read_csv(\"NOTEEVENTS.csv\", engine=\"python\", on_bad_lines='skip')\n",
    "full_dataset = pd.merge(diagnoses, note_events, on =[\"HADM_ID\", \"SUBJECT_ID\"])\n",
    "full_dataset = full_dataset[:40000]\n",
Zaid A Ali's avatar
Zaid A Ali committed
105
    "\n",
106
    "print(full_dataset)\n"
Zaid A Ali's avatar
Zaid A Ali committed
107
108
109
110
111
112
113
114
115
116
117
118
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
119
   "display_name": "Python [conda env:nlp2021]",
Zaid A Ali's avatar
Zaid A Ali committed
120
   "language": "python",
121
   "name": "conda-env-nlp2021-py"
Zaid A Ali's avatar
Zaid A Ali committed
122
123
124
125
126
127
128
129
130
131
132
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
133
   "version": "3.8.10"
Zaid A Ali's avatar
Zaid A Ali committed
134
135
136
  }
 },
 "nbformat": 4,
137
 "nbformat_minor": 4
Zaid A Ali's avatar
Zaid A Ali committed
138
}