Created
August 18, 2017 19:22
-
-
Save ritsz/61fc7e75fa93abac01deff481889310d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ritsz@ritsz-Lenovo:~/.lucene-6.6.0$ echo $CLASSPATH | |
| /home/ritsz/.lucene-6.6.0/core/lucene-core-6.6.0.jar:/home/ritsz/.lucene-6.6.0/demo/lucene-demo-6.6.0.jar:/home/ritsz/.lucene-6.6.0/queryparser/lucene-queryparser-6.6.0.jar:/home/ritsz/.lucene-6.6.0/analysis/common/lucene-analyzers-common-6.6.0.jar | |
| sudo java -cp $CLASSPATH org.apache.lucene.demo.IndexFiles -docs <DOCUMENT DIRECTORY TO SCAN> -index <INDEX PATH> | |
| java org.apache.lucene.demo.SearchFiles | |
| #!/usr/bin/python | |
| # | |
| # extract.py -- Extracts term from an index | |
| # Tomer Gabel, Delver, 2008 | |
| # | |
| # Usage: extract.py <field_name> <index_url> | |
| # | |
| import sys | |
| import string | |
| import lucene | |
| from lucene import IndexReader, StandardAnalyzer, FSDirectory | |
| def usage(): | |
| print "Usage:\n" | |
| print sys.argv[ 0 ] + " <field_name> <index_url>" | |
| sys.exit( -1 ) | |
| def main(): | |
| if ( len( sys.argv ) < 3 ): | |
| usage() | |
| lucene.initVM( lucene.CLASSPATH ) | |
| term = sys.argv[ 1 ] | |
| index_location = sys.argv[ 2 ] | |
| reader = IndexReader.open( FSDirectory.getDirectory( index_location, False ) ) | |
| try: | |
| for i in range( reader.maxDoc() ): | |
| if ( not reader.isDeleted( i ) ): | |
| doc = reader.document( i ) | |
| print doc.get( term ) | |
| finally: | |
| reader.close() | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment