Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
John David Osborne
umlsIndex
Commits
f302ba5a
Commit
f302ba5a
authored
Sep 09, 2016
by
John Osborne
Browse files
Major fixes, returns only hits founds in all words for now. Logging changes.
parent
fdef3502
Changes
6
Hide whitespace changes
Inline
Side-by-side
.classpath
View file @
f302ba5a
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry
kind=
"src"
path=
"src/test/java"
/>
<classpathentry
kind=
"src"
path=
"src/main/java"
/>
<classpathentry
kind=
"src"
output=
"target/test-classes"
path=
"src/test/java"
>
<attributes>
<attribute
name=
"optional"
value=
"true"
/>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
</attributes>
</classpathentry>
<classpathentry
kind=
"src"
output=
"target/classes"
path=
"src/main/java"
>
<attributes>
<attribute
name=
"optional"
value=
"true"
/>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
</attributes>
</classpathentry>
<classpathentry
kind=
"con"
path=
"org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"
>
<attributes>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
...
...
@@ -12,5 +22,10 @@
<attribute
name=
"maven.pomderived"
value=
"true"
/>
</attributes>
</classpathentry>
<classpathentry
excluding=
"**"
kind=
"src"
output=
"target/classes"
path=
"src/main/resources"
>
<attributes>
<attribute
name=
"maven.pomderived"
value=
"true"
/>
</attributes>
</classpathentry>
<classpathentry
kind=
"output"
path=
"target/classes"
/>
</classpath>
bin/searchClient.bsh
0 → 100755
View file @
f302ba5a
#!/bin/bash
java
-cp
target/test-classes:target/classes:target/umlsIndex-0.0.2-SNAPSHOT-jar-with-dependencies.jar
-Dlog4j
.configuration
=
log4j.properties edu.uab.ccts.nlp.umlsIndex.SearchClient
"multiple ulcers"
src/main/java/edu/uab/ccts/nlp/umlsIndex/SearchClient.java
0 → 100644
View file @
f302ba5a
package
edu.uab.ccts.nlp.umlsIndex
;
import
java.io.File
;
import
java.io.IOException
;
import
java.nio.file.Paths
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.index.DirectoryReader
;
import
org.apache.lucene.queryparser.classic.ParseException
;
import
org.apache.lucene.queryparser.classic.QueryParser
;
import
org.apache.lucene.search.IndexSearcher
;
import
org.apache.lucene.search.Query
;
import
org.apache.lucene.search.ScoreDoc
;
import
org.apache.lucene.search.TopDocs
;
import
org.apache.lucene.store.FSDirectory
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
import
com.google.common.collect.HashMultiset
;
public
class
SearchClient
{
private
IndexSearcher
word2termSearcher
=
null
;
private
IndexSearcher
term2conceptSearcher
=
null
;
private
QueryParser
wordParser
=
null
;
private
QueryParser
termParser
=
null
;
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
SearchClient
.
class
);
public
static
void
main
(
String
[]
args
)
throws
Exception
{
File
f
=
new
File
(
Config
.
UMLS_WORD2TERM_INDEX_DIR
);
if
(!(
f
.
isDirectory
()
&&
f
.
list
().
length
>
0
))
{
System
.
err
.
println
(
"Missing "
+
f
);
}
SearchClient
sc
=
new
SearchClient
();
TopDocs
td
=
sc
.
performSearch
(
sc
.
getWordParser
(),
sc
.
getWord2termSearcher
(),
args
[
0
],
100
);
ScoreDoc
[]
hits
=
td
.
scoreDocs
;
LOG
.
info
(
args
[
0
]+
"has "
+
hits
.
length
+
" word hits"
);
HashMultiset
<
String
>
searchsummary
=
sc
.
doIndexSearch
(
hits
);
System
.
out
.
println
(
"Got back "
+
searchsummary
.
elementSet
().
size
()+
" results from "
+
args
[
0
]);
for
(
String
s
:
searchsummary
.
elementSet
())
{
System
.
out
.
println
(
s
);
}
}
public
SearchClient
()
throws
IOException
{
word2termSearcher
=
new
IndexSearcher
(
DirectoryReader
.
open
(
FSDirectory
.
open
(
Paths
.
get
(
Config
.
UMLS_WORD2TERM_INDEX_DIR
))));
term2conceptSearcher
=
new
IndexSearcher
(
DirectoryReader
.
open
(
FSDirectory
.
open
(
Paths
.
get
(
Config
.
UMLS_TERM2CONCEPT_INDEX_DIR
))));
wordParser
=
new
QueryParser
(
"word"
,
new
StandardAnalyzer
());
termParser
=
new
QueryParser
(
"concept"
,
new
StandardAnalyzer
());
}
public
HashMultiset
<
String
>
doIndexSearch
(
ScoreDoc
[]
hits
)
throws
IOException
,
ParseException
{
HashMultiset
<
String
>
searchsummary
=
HashMultiset
.
create
();
for
(
int
i
=
0
;
i
<
hits
.
length
;
i
++)
{
Document
hitDoc
=
word2termSearcher
.
doc
(
hits
[
i
].
doc
);
String
theword
=
hitDoc
.
get
(
"word"
);
String
allconcept_text
=
hitDoc
.
get
(
"conceptText"
);
String
[]
allcons
=
allconcept_text
.
split
(
" "
);
LOG
.
debug
(
theword
+
" id:"
+
hits
[
i
].
doc
+
" with score:"
+
hits
[
i
].
score
+
" is associated with "
+
allcons
.
length
+
" concepts, see::"
+
allconcept_text
);
HashMultiset
<
String
>
hitsummary
=
HashMultiset
.
create
();
for
(
int
j
=
0
;
j
<
allcons
.
length
;
j
++)
{
TopDocs
topcons
=
performSearch
(
termParser
,
term2conceptSearcher
,
allcons
[
j
],
1
);
ScoreDoc
[]
conhits
=
topcons
.
scoreDocs
;
Document
conDoc
=
term2conceptSearcher
.
doc
(
conhits
[
0
].
doc
);
String
cui
=
conDoc
.
get
(
"cui"
);
LOG
.
debug
(
conDoc
.
get
(
"concept"
)+
" with concept score:"
+
conhits
[
0
].
score
+
" and cui: "
+
cui
);
hitsummary
.
add
(
cui
);
}
if
(
searchsummary
.
size
()==
0
)
searchsummary
.
addAll
(
hitsummary
);
else
{
searchsummary
.
retainAll
(
hitsummary
);
}
}
return
searchsummary
;
}
public
TopDocs
performSearch
(
QueryParser
qp
,
IndexSearcher
is
,
String
queryString
,
int
n
)
throws
IOException
,
ParseException
{
Query
query
=
qp
.
parse
(
queryString
);
return
is
.
search
(
query
,
n
);
}
public
Document
getDocument
(
int
docId
)
throws
IOException
{
return
word2termSearcher
.
doc
(
docId
);
}
public
IndexSearcher
getWord2termSearcher
()
{
return
word2termSearcher
;
}
public
void
setWord2termSearcher
(
IndexSearcher
word2termSearcher
)
{
this
.
word2termSearcher
=
word2termSearcher
;
}
public
QueryParser
getWordParser
()
{
return
wordParser
;
}
public
void
setWordParser
(
QueryParser
wordParser
)
{
this
.
wordParser
=
wordParser
;
}
}
src/main/resources/log4j.properties
deleted
100644 → 0
View file @
fdef3502
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger
=
INFO, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1
=
org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout
=
org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern
=
%-4r [%t] %-5p %c %x - %m%n
src/test/java/edu/uab/ccts/nlp/umlsIndex/test/integration/LuceneIndexIT.java
View file @
f302ba5a
...
...
@@ -3,34 +3,26 @@ package edu.uab.ccts.nlp.umlsIndex.test.integration;
import
java.io.IOException
;
import
java.io.File
;
import
java.nio.file.Paths
;
import
org.apache.lucene.analysis.standard.StandardAnalyzer
;
import
org.apache.lucene.document.Document
;
import
org.apache.lucene.index.DirectoryReader
;
import
org.apache.lucene.queryparser.classic.ParseException
;
import
org.apache.lucene.queryparser.classic.QueryParser
;
import
org.apache.lucene.search.IndexSearcher
;
import
org.apache.lucene.search.Query
;
import
org.apache.lucene.search.ScoreDoc
;
import
org.apache.lucene.search.TopDocs
;
import
org.apache.lucene.store.FSDirectory
;
import
com.google.common.collect.HashMultiset
;
import
edu.uab.ccts.nlp.umlsIndex.Config
;
import
edu.uab.ccts.nlp.umlsIndex.SearchClient
;
import
org.junit.Assume
;
import
org.slf4j.Logger
;
import
org.slf4j.LoggerFactory
;
/**
* Unit test for simple App.
*/
public
class
LuceneIndexIT
{
private
IndexSearcher
word2termSearcher
=
null
;
private
IndexSearcher
term2conceptSearcher
=
null
;
private
QueryParser
wordParser
=
null
;
private
QueryParser
termParser
=
null
;
SearchClient
searchClient
=
null
;
private
static
final
Logger
LOG
=
LoggerFactory
.
getLogger
(
LuceneIndexIT
.
class
)
;
/**
* Create the test case
*
...
...
@@ -38,10 +30,7 @@ public class LuceneIndexIT
*/
public
LuceneIndexIT
()
throws
IOException
{
word2termSearcher
=
new
IndexSearcher
(
DirectoryReader
.
open
(
FSDirectory
.
open
(
Paths
.
get
(
Config
.
UMLS_WORD2TERM_INDEX_DIR
))));
term2conceptSearcher
=
new
IndexSearcher
(
DirectoryReader
.
open
(
FSDirectory
.
open
(
Paths
.
get
(
Config
.
UMLS_TERM2CONCEPT_INDEX_DIR
))));
wordParser
=
new
QueryParser
(
"word"
,
new
StandardAnalyzer
());
termParser
=
new
QueryParser
(
"concept"
,
new
StandardAnalyzer
());
searchClient
=
new
SearchClient
();
}
...
...
@@ -53,43 +42,16 @@ public class LuceneIndexIT
File
f
=
new
File
(
Config
.
UMLS_WORD2TERM_INDEX_DIR
);
org
.
junit
.
Assume
.
assumeTrue
(!(
f
.
isDirectory
()
&&
f
.
list
().
length
>
0
));
TopDocs
td
=
performSearch
(
wordParser
,
word2termSearcher
,
"multiple ulcers"
,
100
);
ScoreDoc
[]
hits
=
td
.
scoreDocs
;
System
.
out
.
println
(
"Number of hits: "
+
hits
.
length
);
HashMultiset
<
String
>
searchsummary
=
HashMultiset
.
create
();
for
(
int
i
=
0
;
i
<
hits
.
length
;
i
++)
{
Document
hitDoc
=
word2termSearcher
.
doc
(
hits
[
i
].
doc
);
String
theword
=
hitDoc
.
get
(
"word"
);
String
allconcept_text
=
hitDoc
.
get
(
"conceptText"
);
String
[]
allcons
=
allconcept_text
.
split
(
" "
);
System
.
out
.
println
(
theword
+
" id:"
+
hits
[
i
].
doc
+
" with score:"
+
hits
[
i
].
score
+
" is associated with "
+
allcons
.
length
+
" concepts, see::"
+
allconcept_text
);
TopDocs
td
=
searchClient
.
performSearch
(
searchClient
.
getWordParser
(),
searchClient
.
getWord2termSearcher
(),
"multiple ulcers"
,
100
);
HashMultiset
<
String
>
hitsummary
=
HashMultiset
.
create
();
for
(
int
j
=
0
;
j
<
allcons
.
length
;
j
++)
{
TopDocs
topcons
=
performSearch
(
termParser
,
term2conceptSearcher
,
allcons
[
j
],
1
);
ScoreDoc
[]
conhits
=
topcons
.
scoreDocs
;
Document
conDoc
=
term2conceptSearcher
.
doc
(
conhits
[
0
].
doc
);
String
cui
=
conDoc
.
get
(
"cui"
);
System
.
out
.
println
(
conDoc
.
get
(
"concept"
)+
" with concept score:"
+
conhits
[
0
].
score
+
" and cui: "
+
cui
);
hitsummary
.
add
(
cui
);
}
searchsummary
.
addAll
(
hitsummary
);
}
assert
(
searchsummary
.
contains
(
"C1265815"
));
ScoreDoc
[]
hits
=
td
.
scoreDocs
;
LOG
.
info
(
"Number of hits: "
+
hits
.
length
);
HashMultiset
<
String
>
searchsummary
=
searchClient
.
doIndexSearch
(
hits
);
assert
(
searchsummary
.
contains
(
"C1265815"
));
}
public
TopDocs
performSearch
(
QueryParser
qp
,
IndexSearcher
is
,
String
queryString
,
int
n
)
throws
IOException
,
ParseException
{
Query
query
=
qp
.
parse
(
queryString
);
return
is
.
search
(
query
,
n
);
}
public
Document
getDocument
(
int
docId
)
throws
IOException
{
return
word2termSearcher
.
doc
(
docId
);
}
}
src/test/resources/log4j.properties
0 → 100644
View file @
f302ba5a
#log4j.rootLogger=DEBUG
#log4j.rootLogger=DEBUG, file
log4j.rootLogger
=
INFO, file
#log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.file
=
org.apache.log4j.RollingFileAppender
log4j.appender.file.File
=
log/umlsIndex.log
log4j.appender.file.MaxFileSize
=
100MB
log4j.appender.file.MaxBackupIndex
=
10
#log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout
=
org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern
=
%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment