Lucene中Index源代码
DocValuesWriter
BinaryDocValuesWriter
SortedDocValuesWriter
SortedSetDocValuesWriter
NormValuesWriter
NumericDocValuesWriter
PointValuesWriter
ByteSliceWriter
DocumentsWriter
FreqProxTermsWriter
IndexWriter
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException {
// 检查当前目录下是否存在有待删除且挂起的文件,如果有,则执行删除操作,删除失败抛出异常,停止执行。
if (d instanceof FSDirectory && ((FSDirectory) d).checkPendingDeletions()) {
throw new IllegalArgumentException("Directory " + d + " still has pending deleted files; cannot initialize IndexWriter");
}
conf.setIndexWriter(this); // prevent reuse by other instances
config = conf;
infoStream = config.getInfoStream();
// obtain the write.lock. If the user configured a timeout,
// we wrap with a sleeper and this might take some time.
writeLock = d.obtainLock(WRITE_LOCK_NAME);
boolean success = false;
try {
directoryOrig = d;
directory = new LockValidatingDirectoryWrapper(d, writeLock);
// Directory we use for merging, so we can abort running merges, and so
// merge schedulers can optionally rate-limit per-merge IO:
mergeDirectory = addMergeRateLimiters(directory);
analyzer = config.getAnalyzer();
mergeScheduler = config.getMergeScheduler();
mergeScheduler.setInfoStream(infoStream);
codec = config.getCodec();
bufferedUpdatesStream = new BufferedUpdatesStream(infoStream);
poolReaders = config.getReaderPooling();
OpenMode mode = config.getOpenMode();
boolean create;
if (mode == OpenMode.CREATE) {
create = true;
} else if (mode == OpenMode.APPEND) {
create = false;
} else {
// CREATE_OR_APPEND - create only if an index does not exist
create = !DirectoryReader.indexExists(directory);
}
// If index is too old, reading the segments will throw
// IndexFormatTooOldException.
boolean initialIndexExists = true;
String[] files = directory.listAll();
// Set up our initial SegmentInfos:
IndexCommit commit = config.getIndexCommit();
// Set up our initial SegmentInfos:
StandardDirectoryReader reader;
if (commit == null) {
reader = null;
} else {
reader = commit.getReader();
}
if (create) {
if (config.getIndexCommit() != null) {
// We cannot both open from a commit point and create:
if (mode == OpenMode.CREATE) {
throw new IllegalArgumentException("cannot use IndexWriterConfig.setIndexCommit() with OpenMode.CREATE");
} else {
throw new IllegalArgumentException("cannot use IndexWriterConfig.setIndexCommit() when index has no commit");
}
}
// Try to read first. This is to allow create
// against an index that's currently open for
// searching. In this case we write the next
// segments_N file with no segments:
SegmentInfos sis = null;
try {
sis = SegmentInfos.readLatestCommit(directory);
sis.clear();
} catch (IOException e) {
// Likely this means it's a fresh directory
initialIndexExists = false;
sis = new SegmentInfos();
}
segmentInfos = sis;
rollbackSegments = segmentInfos.createBackupSegmentInfos();
// Record that we have a change (zero out all
// segments) pending:
changed();
} else if (reader != null) {
// Init from an existing already opened NRT or non-NRT reader:
if (reader.directory() != commit.getDirectory()) {
throw new IllegalArgumentException("IndexCommit's reader must have the same directory as the IndexCommit");
}
if (reader.directory() != directoryOrig) {
throw new IllegalArgumentException("IndexCommit's reader must have the same directory passed to IndexWriter");
}
if (reader.segmentInfos.getLastGeneration() == 0) {
// TODO: maybe we could allow this? It's tricky...
throw new IllegalArgumentException("index must already have an initial commit to open from reader");
}
// Must clone because we don't want the incoming NRT reader to "see" any changes this writer now makes:
segmentInfos = reader.segmentInfos.clone();
SegmentInfos lastCommit;
try {
lastCommit = SegmentInfos.readCommit(directoryOrig, segmentInfos.getSegmentsFileName());
} catch (IOException ioe) {
throw new IllegalArgumentException("the provided reader is stale: its prior commit file \"" + segmentInfos.getSegmentsFileName() + "\" is missing from index");
}
if (reader.writer != null) {
// The old writer better be closed (we have the write lock now!):
assert reader.writer.closed;
// In case the old writer wrote further segments (which we are now dropping),
// update SIS metadata so we remain write-once:
segmentInfos.updateGenerationVersionAndCounter(reader.writer.segmentInfos);
lastCommit.updateGenerationVersionAndCounter(reader.writer.segmentInfos);
}
rollbackSegments = lastCommit.createBackupSegmentInfos();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init from reader " + reader);
messageState();
}
} else {
// Init from either the latest commit point, or an explicit prior commit point:
String lastSegmentsFile = SegmentInfos.getLastCommitSegmentsFileName(files);
if (lastSegmentsFile == null) {
throw new IndexNotFoundException("no segments* file found in " + directory + ": files: " + Arrays.toString(files));
}
// Do not use SegmentInfos.read(Directory) since the spooky
// retrying it does is not necessary here (we hold the write lock):
segmentInfos = SegmentInfos.readCommit(directoryOrig, lastSegmentsFile);
if (commit != null) {
// Swap out all segments, but, keep metadata in
// SegmentInfos, like version & generation, to
// preserve write-once. This is important if
// readers are open against the future commit
// points.
if (commit.getDirectory() != directoryOrig) {
throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory, expected=" + directoryOrig + ", got=" + commit.getDirectory());
}
SegmentInfos oldInfos = SegmentInfos.readCommit(directoryOrig, commit.getSegmentsFileName());
segmentInfos.replace(oldInfos);
changed();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
}
}
rollbackSegments = segmentInfos.createBackupSegmentInfos();
}
pendingNumDocs.set(segmentInfos.totalMaxDoc());
// start with previous field numbers, but new FieldInfos
// NOTE: this is correct even for an NRT reader because we'll pull FieldInfos even for the un-committed segments:
globalFieldNumberMap = getFieldNumberMap();
config.getFlushPolicy().init(config);
docWriter = new DocumentsWriter(this, config, directoryOrig, directory);
eventQueue = docWriter.eventQueue();
// Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter:
// Sync'd is silly here, but IFD asserts we sync'd on the IW instance:
synchronized(this) {
deleter = new IndexFileDeleter(files, directoryOrig, directory,
config.getIndexDeletionPolicy(),
segmentInfos, infoStream, this,
initialIndexExists, reader != null);
// We incRef all files when we return an NRT reader from IW, so all files must exist even in the NRT case:
assert create || filesExist(segmentInfos);
}
if (deleter.startingCommitDeleted) {
// Deletion policy deleted the "head" commit point.
// We have to mark ourself as changed so that if we
// are closed w/o any further changes we write a new
// segments_N file.
changed();
}
if (reader != null) {
// Pre-enroll all segment readers into the reader pool; this is necessary so
// any in-memory NRT live docs are correctly carried over, and so NRT readers
// pulled from this IW share the same segment reader:
List<LeafReaderContext> leaves = reader.leaves();
assert segmentInfos.size() == leaves.size();
for (int i=0;i<leaves.size();i++) {
LeafReaderContext leaf = leaves.get(i);
SegmentReader segReader = (SegmentReader) leaf.reader();
SegmentReader newReader = new SegmentReader(segmentInfos.info(i), segReader, segReader.getLiveDocs(), segReader.numDocs());
readerPool.readerMap.put(newReader.getSegmentInfo(), new ReadersAndUpdates(this, newReader));
}
// We always assume we are carrying over incoming changes when opening from reader:
segmentInfos.changed();
changed();
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init: create=" + create);
messageState();
}
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init: hit exception on init; releasing write lock");
}
IOUtils.closeWhileHandlingException(writeLock);
writeLock = null;
}
}
}
欢迎大家关注DataLearner官方微信,接受最新的AI技术推送
