Commit fdef3502 authored by John David Osborne's avatar John David Osborne
Browse files

SNOMED CT querying, don't do IT unless index is populated

parent b740c238
# umlsIndex # umlsIndex
Short code to do some simple indexing of UMLS. Short code to do some simple indexing of SNOMEDCT via UMLS.
FIXME - Should pull all synonyms, not just the most common/largest/smallest. Should have memory for it, order query by CUI and write after all synonyms have been pulled
...@@ -231,8 +231,8 @@ Text fields are useful for keyword search. ...@@ -231,8 +231,8 @@ Text fields are useful for keyword search.
*/ */
public List<String> dropStopWords(List<String> allWords) throws URISyntaxException{ public List<String> dropStopWords(List<String> allWords) throws URISyntaxException{
List<String> stops = new ArrayList<String>(); List<String> stops = new ArrayList<String>();
//try (Stream<String> stream = Files.lines(Paths.get(getClass().getResource("/StopWords.txt").toURI()))) { try (Stream<String> stream = Files.lines(Paths.get(getClass().getResource("/StopWords.txt").toURI()))) {
try (Stream<String> stream = new BufferedReader(new InputStreamReader(ClassLoader.getSystemResourceAsStream("StopWords.txt"))).lines()){ //try (Stream<String> stream = new BufferedReader(new InputStreamReader(ClassLoader.getSystemResourceAsStream("StopWords.txt"))).lines()){
stops = stream stops = stream
.filter(line -> !line.startsWith("#")) .filter(line -> !line.startsWith("#"))
.collect(Collectors.toList()); .collect(Collectors.toList());
......
...@@ -19,8 +19,9 @@ WHERE mrconso.LAT='ENG' ...@@ -19,8 +19,9 @@ WHERE mrconso.LAT='ENG'
-- AND mrconso.ts = 'P' -- AND mrconso.ts = 'P'
-- AND mrconso.stt = 'PF' -- AND mrconso.stt = 'PF'
-- AND mrconso.ispref = 'Y' -- AND mrconso.ispref = 'Y'
AND tui IN ('T046') -- AND tui IN ('T046')
-- AND tui IN ('T046','T047') -- AND tui IN ('T046','T047')
AND mrconso.SAB LIKE 'SNOMEDCT%'
-- AND mrconso.cui='C0814136' -- AND mrconso.cui='C0814136'
GROUP BY mrconso.cui GROUP BY mrconso.cui
) thetable ) thetable
...@@ -2,6 +2,7 @@ package edu.uab.ccts.nlp.umlsIndex.test.integration; ...@@ -2,6 +2,7 @@ package edu.uab.ccts.nlp.umlsIndex.test.integration;
import java.io.IOException; import java.io.IOException;
import java.io.File;
import java.nio.file.Paths; import java.nio.file.Paths;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
...@@ -19,6 +20,7 @@ import com.google.common.collect.HashMultiset; ...@@ -19,6 +20,7 @@ import com.google.common.collect.HashMultiset;
import edu.uab.ccts.nlp.umlsIndex.Config; import edu.uab.ccts.nlp.umlsIndex.Config;
import org.junit.Assume;
/** /**
* Unit test for simple App. * Unit test for simple App.
...@@ -48,6 +50,9 @@ public class LuceneIndexIT ...@@ -48,6 +50,9 @@ public class LuceneIndexIT
@org.junit.Test @org.junit.Test
public void testIndex() throws Exception public void testIndex() throws Exception
{ {
File f = new File(Config.UMLS_WORD2TERM_INDEX_DIR);
org.junit.Assume.assumeTrue(!(f.isDirectory() && f.list().length>0));
TopDocs td = performSearch(wordParser,word2termSearcher,"multiple ulcers", 100); TopDocs td = performSearch(wordParser,word2termSearcher,"multiple ulcers", 100);
ScoreDoc[] hits = td.scoreDocs; ScoreDoc[] hits = td.scoreDocs;
System.out.println("Number of hits: " + hits.length); System.out.println("Number of hits: " + hits.length);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment