Lucene++ - a full-featured, c++ search engine
API Documentation


DocumentsWriter.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef DOCUMENTSWRITER_H
8 #define DOCUMENTSWRITER_H
9 
10 #include "ByteBlockPool.h"
11 #include "RAMFile.h"
12 
13 namespace Lucene {
14 
54 class DocumentsWriter : public LuceneObject {
55 public:
57  virtual ~DocumentsWriter();
58 
60 
61 protected:
62  String docStoreSegment; // Current doc-store segment we are writing
63  int32_t docStoreOffset; // Current starting doc-store offset of current segment
64 
65  int32_t nextDocID; // Next docID to be added
66  int32_t numDocsInRAM; // # docs buffered in RAM
67 
69  static const int32_t MAX_THREAD_STATE;
71  MapThreadDocumentsWriterThreadState threadBindings;
72 
73  int32_t pauseThreads; // Non-zero when we need all threads to pause (eg to flush)
74  bool aborting; // True if an abort is pending
75 
77 
80 
83 
86 
88  int64_t ramBufferSize;
91 
93  int64_t freeTrigger;
94  int64_t freeLevel;
95 
97  int32_t maxBufferedDocs;
98 
101 
102  bool closed;
103 
107 
110 
111 public:
113  static const int32_t OBJECT_HEADER_BYTES;
114  static const int32_t POINTER_NUM_BYTE;
115  static const int32_t INT_NUM_BYTE;
116  static const int32_t CHAR_NUM_BYTE;
117 
123  static const int32_t BYTES_PER_DEL_TERM;
124 
127  static const int32_t BYTES_PER_DEL_DOCID;
128 
132  static const int32_t BYTES_PER_DEL_QUERY;
133 
135  static const int32_t BYTE_BLOCK_SHIFT;
136  static const int32_t BYTE_BLOCK_SIZE;
137  static const int32_t BYTE_BLOCK_MASK;
138  static const int32_t BYTE_BLOCK_NOT_MASK;
139 
141  static const int32_t CHAR_BLOCK_SHIFT;
142  static const int32_t CHAR_BLOCK_SIZE;
143  static const int32_t CHAR_BLOCK_MASK;
144 
145  static const int32_t MAX_TERM_LENGTH;
146 
148  static const int32_t INT_BLOCK_SHIFT;
149  static const int32_t INT_BLOCK_SIZE;
150  static const int32_t INT_BLOCK_MASK;
151 
152  static const int32_t PER_DOC_BLOCK_SIZE;
153 
154 INTERNAL:
158  String segment; // Current segment we are working on
159 
160  int32_t numDocsInStore; // # docs written to doc stores
161 
162  bool flushPending; // True when a thread has decided to flush
163  bool bufferIsFull; // True when it's time to write segment
164 
166  int32_t maxFieldLength;
168 
170 
173 
176 
179 
180  int64_t numBytesAlloc;
181  int64_t numBytesUsed;
182 
183  // used only by assert
185 
186 public:
187  virtual void initialize();
188 
191 
193 
194  void updateFlushedDocCount(int32_t n);
195  int32_t getFlushedDocCount();
196  void setFlushedDocCount(int32_t n);
197 
199  bool hasProx();
200 
202  void setInfoStream(const InfoStreamPtr& infoStream);
203 
204  void setMaxFieldLength(int32_t maxFieldLength);
205  void setSimilarity(const SimilarityPtr& similarity);
206 
208  void setRAMBufferSizeMB(double mb);
209  double getRAMBufferSizeMB();
210 
212  void setMaxBufferedDocs(int32_t count);
213  int32_t getMaxBufferedDocs();
214 
216  String getSegment();
217 
219  int32_t getNumDocsInRAM();
220 
222  String getDocStoreSegment();
223 
225  int32_t getDocStoreOffset();
226 
229  String closeDocStore();
230 
232 
233  void message(const String& message);
234 
238 
239  void addOpenFile(const String& name);
240  void removeOpenFile(const String& name);
241 
242  void setAborting();
243 
246  void abort();
247 
249  bool pauseAllThreads();
250  void resumeAllThreads();
251 
252  bool anyChanges();
253 
254  void initFlushState(bool onlyDocStore);
255 
257  int32_t flush(bool _closeDocStore);
258 
260 
262  void createCompoundFile(const String& segment);
263 
266  bool setFlushPending();
267  void clearFlushPending();
268 
269  void pushDeletes();
270 
271  void close();
272 
273  void initSegmentName(bool onlyDocStore);
274 
279 
281  bool addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer);
282 
283  bool updateDocument(const TermPtr& t, const DocumentPtr& doc, const AnalyzerPtr& analyzer);
284  bool updateDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer, const TermPtr& delTerm);
285 
286  int32_t getNumBufferedDeleteTerms(); // for testing
287  MapTermNum getBufferedDeleteTerms(); // for testing
288 
290  void remapDeletes(const SegmentInfosPtr& infos, Collection< Collection<int32_t> > docMaps, Collection<int32_t> delCounts, const OneMergePtr& merge, int32_t mergeDocCount);
291 
293  bool bufferDeleteTerm(const TermPtr& term);
295  bool bufferDeleteQuery(const QueryPtr& query);
296  bool deletesFull();
297  bool doApplyDeletes();
298 
299  void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms);
300  int32_t getMaxBufferedDeleteTerms();
301 
302  bool hasDeletes();
303  bool applyDeletes(const SegmentInfosPtr& infos);
304  bool doBalanceRAM();
305 
306  void waitForWaitQueue();
307 
308  int64_t getRAMUsed();
309 
310  IntArray getIntBlock(bool trackAllocations);
311  void bytesAllocated(int64_t numBytes);
312  void bytesUsed(int64_t numBytes);
313  void recycleIntBlocks(Collection<IntArray> blocks, int32_t start, int32_t end);
314 
315  CharArray getCharBlock();
316  void recycleCharBlocks(Collection<CharArray> blocks, int32_t numBlocks);
317 
318  String toMB(int64_t v);
319 
328  void balanceRAM();
329 
330 protected:
332  void doAfterFlush();
333 
334  bool allThreadsIdle();
335 
336  void waitReady(const DocumentsWriterThreadStatePtr& state);
337 
338  bool timeToFlushDeletes();
339 
340  // used only by assert
341  bool checkDeleteTerm(const TermPtr& term);
342 
343  bool applyDeletes(const IndexReaderPtr& reader, int32_t docIDStart);
344  void addDeleteTerm(const TermPtr& term, int32_t docCount);
345 
347  void addDeleteDocID(int32_t docID);
348  void addDeleteQuery(const QueryPtr& query, int32_t docID);
349 
351  void finishDocument(const DocumentsWriterThreadStatePtr& perThread, const DocWriterPtr& docWriter);
352 
353  friend class WaitQueue;
354 };
355 
356 class DocState : public LuceneObject {
357 public:
358  DocState();
359  virtual ~DocState();
360 
362 
363 public:
364  DocumentsWriterWeakPtr _docWriter;
366  int32_t maxFieldLength;
369  int32_t docID;
372 
373 public:
375  virtual bool testPoint(const String& name);
376 
377  void clear();
378 };
379 
381 class PerDocBuffer : public RAMFile {
382 public:
383  PerDocBuffer(const DocumentsWriterPtr& docWriter);
384  virtual ~PerDocBuffer();
385 
387 
388 protected:
389  DocumentsWriterWeakPtr _docWriter;
390 
391 public:
393  void recycle();
394 
395 protected:
397  virtual ByteArray newBuffer(int32_t size);
398 };
399 
402 class DocWriter : public LuceneObject {
403 public:
404  DocWriter();
405  virtual ~DocWriter();
406 
408 
409 public:
410  DocWriterPtr next;
411  int32_t docID;
412 
413 public:
414  virtual void finish() = 0;
415  virtual void abort() = 0;
416  virtual int64_t sizeInBytes() = 0;
417 
418  virtual void setNext(const DocWriterPtr& next);
419 };
420 
423 class IndexingChain : public LuceneObject {
424 public:
425  virtual ~IndexingChain();
426 
428 
429 public:
430  virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter) = 0;
431 };
432 
448 public:
449  virtual ~DefaultIndexingChain();
450 
452 
453 public:
454  virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter);
455 };
456 
457 class SkipDocWriter : public DocWriter {
458 public:
459  virtual ~SkipDocWriter();
460 
462 
463 public:
464  virtual void finish();
465  virtual void abort();
466  virtual int64_t sizeInBytes();
467 };
468 
469 class WaitQueue : public LuceneObject {
470 public:
471  WaitQueue(const DocumentsWriterPtr& docWriter);
472  virtual ~WaitQueue();
473 
475 
476 protected:
477  DocumentsWriterWeakPtr _docWriter;
478 
479 public:
481  int32_t nextWriteDocID;
482  int32_t nextWriteLoc;
483  int32_t numWaiting;
484  int64_t waitingBytes;
485 
486 public:
487  void reset();
488  bool doResume();
489  bool doPause();
490  void abort();
491  bool add(const DocWriterPtr& doc);
492 
493 protected:
494  void writeDocument(const DocWriterPtr& doc);
495 };
496 
498 public:
499  ByteBlockAllocator(const DocumentsWriterPtr& docWriter, int32_t blockSize);
500  virtual ~ByteBlockAllocator();
501 
503 
504 protected:
505  DocumentsWriterWeakPtr _docWriter;
506 
507 public:
508  int32_t blockSize;
510 
511 public:
513  virtual ByteArray getByteBlock(bool trackAllocations);
514 
516  virtual void recycleByteBlocks(Collection<ByteArray> blocks, int32_t start, int32_t end);
517  virtual void recycleByteBlocks(Collection<ByteArray> blocks);
518 };
519 
520 }
521 
522 #endif
Definition: ByteBlockPool.h:54
static const int32_t MAX_THREAD_STATE
Max # ThreadState instances; if there are more threads than this they share ThreadStates.
Definition: DocumentsWriter.h:69
void abort()
Called if we hit an exception at a bad time (when updating the index files) and must discard all curr...
InfoStreamPtr infoStream
Definition: DocumentsWriter.h:367
int32_t numDocsInStore
Definition: DocumentsWriter.h:160
void doAfterFlush()
Reset after a flush.
static const int32_t CHAR_BLOCK_MASK
Definition: DocumentsWriter.h:143
void initSegmentName(bool onlyDocStore)
void setMaxBufferedDocs(int32_t count)
Set max buffered docs, which means we will flush by doc count instead of by RAM usage.
String closeDocStore()
Closes the current open doc stores an returns the doc store segment name. This returns null if there ...
void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms)
void bytesUsed(int64_t numBytes)
static const int32_t BYTE_BLOCK_NOT_MASK
Definition: DocumentsWriter.h:138
String maxTermPrefix
Definition: DocumentsWriter.h:371
void addDeleteTerm(const TermPtr &term, int32_t docCount)
void waitReady(const DocumentsWriterThreadStatePtr &state)
void setFlushedDocCount(int32_t n)
int32_t maxBufferedDeleteTerms
The max number of delete terms that can be buffered before they must be flushed to disk...
Definition: DocumentsWriter.h:85
static IndexingChainPtr getDefaultIndexingChain()
void setInfoStream(const InfoStreamPtr &infoStream)
If non-null, various details of indexing are printed here.
HashSet< String > _closedFiles
Definition: DocumentsWriter.h:172
bool flushPending
Definition: DocumentsWriter.h:162
boost::shared_ptr< InfoStream > InfoStreamPtr
Definition: LuceneTypes.h:532
AnalyzerPtr analyzer
Definition: DocumentsWriter.h:365
BufferedDeletesPtr deletesInRAM
Deletes done after the last flush; these are discarded on abort.
Definition: DocumentsWriter.h:79
boost::shared_ptr< Term > TermPtr
Definition: LuceneTypes.h:233
This class accepts multiple added documents and directly writes a single segment file. It does this more efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on those segments.
Definition: DocumentsWriter.h:54
boost::shared_ptr< Analyzer > AnalyzerPtr
Definition: LuceneTypes.h:20
static const int32_t OBJECT_HEADER_BYTES
Coarse estimates used to measure RAM usage of buffered deletes.
Definition: DocumentsWriter.h:113
WaitQueuePtr waitQueue
Definition: DocumentsWriter.h:174
int32_t numWaiting
Definition: DocumentsWriter.h:483
int32_t getNumBufferedDeleteTerms()
DocumentsWriterThreadStatePtr getThreadState(const DocumentPtr &doc, const TermPtr &delTerm)
Returns a free (idle) ThreadState that may be used for indexing this one document. This call also pauses if a flush is pending. If delTerm is non-null then we buffer this deleted term after the thread state has been acquired.
static const int32_t CHAR_NUM_BYTE
Definition: DocumentsWriter.h:116
boost::shared_ptr< OneMerge > OneMergePtr
Definition: LuceneTypes.h:192
int64_t numBytesUsed
Definition: DocumentsWriter.h:181
void createCompoundFile(const String &segment)
Build compound file for the segment we just flushed.
boost::shared_ptr< BufferedDeletes > BufferedDeletesPtr
Definition: LuceneTypes.h:87
IndexingChainPtr indexingChain
Definition: DocumentsWriter.h:157
int32_t maxFieldLength
Definition: DocumentsWriter.h:366
static const int32_t PER_DOC_BLOCK_SIZE
Definition: DocumentsWriter.h:152
void recycleIntBlocks(Collection< IntArray > blocks, int32_t start, int32_t end)
bool checkDeleteTerm(const TermPtr &term)
int32_t maxBufferedDocs
Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead...
Definition: DocumentsWriter.h:97
DocumentPtr doc
Definition: DocumentsWriter.h:370
static const int32_t INT_BLOCK_SIZE
Definition: DocumentsWriter.h:149
static const int32_t CHAR_BLOCK_SIZE
Definition: DocumentsWriter.h:142
HashSet< String > closedFiles()
int32_t docStoreOffset
Definition: DocumentsWriter.h:63
void setSimilarity(const SimilarityPtr &similarity)
bool pauseAllThreads()
Returns true if an abort is in progress.
boost::shared_ptr< IndexWriter > IndexWriterPtr
Definition: LuceneTypes.h:160
void bytesAllocated(int64_t numBytes)
IntArray getIntBlock(bool trackAllocations)
HashSet< String > openFiles()
Returns Collection of files in use by this instance, including any flushed segments.
RAMFile buffer for DocWriters.
Definition: DocumentsWriter.h:381
int32_t nextDocID
Definition: DocumentsWriter.h:65
static const int32_t INT_BLOCK_SHIFT
Initial chunks size of the shared int[] blocks used to store postings data.
Definition: DocumentsWriter.h:148
int32_t maxFieldLength
Definition: DocumentsWriter.h:166
boost::shared_ptr< WaitQueue > WaitQueuePtr
Definition: LuceneTypes.h:265
int64_t freeTrigger
If we&#39;ve allocated 5% over our RAM budget, we then free down to 95%.
Definition: DocumentsWriter.h:93
DocFieldProcessorPtr docFieldProcessor
Definition: DocumentsWriter.h:76
boost::shared_ptr< SkipDocWriter > SkipDocWriterPtr
Definition: LuceneTypes.h:226
SimilarityPtr similarity
Definition: DocumentsWriter.h:167
int64_t waitingBytes
Definition: DocumentsWriter.h:484
boost::shared_ptr< DocumentsWriterThreadState > DocumentsWriterThreadStatePtr
Definition: LuceneTypes.h:124
boost::weak_ptr< DocumentsWriter > DocumentsWriterWeakPtr
Definition: LuceneTypes.h:123
HashSet< String > getFlushedFiles()
BufferedDeletesPtr deletesFlushed
Deletes done before the last flush; these are still kept on abort.
Definition: DocumentsWriter.h:82
void addDeleteDocID(int32_t docID)
Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a docum...
static const int32_t INT_BLOCK_MASK
Definition: DocumentsWriter.h:150
bool bufferDeleteQuery(const QueryPtr &query)
boost::weak_ptr< IndexWriter > IndexWriterWeakPtr
Definition: LuceneTypes.h:160
static const int32_t BYTES_PER_DEL_QUERY
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount (say 24 bytes). Integer is OBJ_HEADER + INT.
Definition: DocumentsWriter.h:132
boost::shared_ptr< SegmentInfos > SegmentInfosPtr
Definition: LuceneTypes.h:210
boost::shared_ptr< DocFieldProcessor > DocFieldProcessorPtr
Definition: LuceneTypes.h:115
boost::shared_ptr< DocumentsWriter > DocumentsWriterPtr
Definition: LuceneTypes.h:123
int32_t nextWriteDocID
Definition: DocumentsWriter.h:481
void addDeleteQuery(const QueryPtr &query, int32_t docID)
String toMB(int64_t v)
void finishDocument(const DocumentsWriterThreadStatePtr &perThread, const DocWriterPtr &docWriter)
Does the synchronized work to finish/flush the inverted document.
HashSet< String > abortedFiles()
boost::shared_ptr< Document > DocumentPtr
Definition: LuceneTypes.h:74
String segment
Definition: DocumentsWriter.h:158
boost::shared_ptr< Query > QueryPtr
Definition: LuceneTypes.h:420
static const int32_t MAX_TERM_LENGTH
Definition: DocumentsWriter.h:145
bool updateDocument(const TermPtr &t, const DocumentPtr &doc, const AnalyzerPtr &analyzer)
void message(const String &message)
void setRAMBufferSizeMB(double mb)
Set how much RAM we can use before flushing.
Definition: DocumentsWriter.h:356
bool closed
Definition: DocumentsWriter.h:102
bool hasProx()
Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false.
boost::shared_ptr< ByteBlockAllocator > ByteBlockAllocatorPtr
Definition: LuceneTypes.h:88
File used as buffer in RAMDirectory.
Definition: RAMFile.h:15
DocConsumerPtr consumer
Definition: DocumentsWriter.h:169
int32_t docID
Definition: DocumentsWriter.h:411
boost::shared_ptr< IndexReader > IndexReaderPtr
Definition: LuceneTypes.h:157
boost::shared_ptr< Directory > DirectoryPtr
Definition: LuceneTypes.h:489
int32_t getNumDocsInRAM()
Returns how many docs are currently buffered in RAM.
int32_t blockSize
Definition: DocumentsWriter.h:508
static const int32_t CHAR_BLOCK_SHIFT
Initial chunk size of the shared char[] blocks used to store term text.
Definition: DocumentsWriter.h:141
ByteBlockAllocatorPtr perDocAllocator
Definition: DocumentsWriter.h:178
Base class for all Lucene classes.
Definition: LuceneObject.h:31
boost::shared_ptr< DocConsumer > DocConsumerPtr
Definition: LuceneTypes.h:106
Collection< ByteArray > freeByteBlocks
Definition: DocumentsWriter.h:509
friend class WaitQueue
Definition: DocumentsWriter.h:353
int64_t ramBufferSize
How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead...
Definition: DocumentsWriter.h:88
int32_t getMaxBufferedDeleteTerms()
String getDocStoreSegment()
Returns the current doc store segment we are writing to.
Collection< DocumentsWriterThreadStatePtr > threadStates
Definition: DocumentsWriter.h:70
DocumentsWriter(const DirectoryPtr &directory, const IndexWriterPtr &writer, const IndexingChainPtr &indexingChain)
void removeOpenFile(const String &name)
#define LUCENE_CLASS(Name)
Definition: LuceneObject.h:24
static const int32_t BYTES_PER_DEL_TERM
Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is object with String field and String text (OBJ_HEADER + 2*POINTER). We don&#39;t count Term&#39;s field since it&#39;s interned. Term&#39;s text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). BufferedDeletes.num is OBJ_HEADER + INT.
Definition: DocumentsWriter.h:123
Collection< CharArray > freeCharBlocks
Definition: DocumentsWriter.h:109
Definition: AbstractAllTermDocs.h:12
bool bufferDeleteTerms(Collection< TermPtr > terms)
bool setFlushPending()
Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter...
TermPtr lastDeleteTerm
Definition: DocumentsWriter.h:184
int32_t flushedDocCount
How many docs already flushed to index.
Definition: DocumentsWriter.h:100
Collection< IntArray > freeIntBlocks
Definition: DocumentsWriter.h:108
bool bufferDeleteTerm(const TermPtr &term)
Definition: DocumentsWriter.h:457
HashSet< String > _openFiles
Definition: DocumentsWriter.h:171
Definition: DocumentsWriter.h:469
int64_t freeLevel
Definition: DocumentsWriter.h:94
SimilarityPtr similarity
Definition: DocumentsWriter.h:368
void recycleCharBlocks(Collection< CharArray > blocks, int32_t numBlocks)
Definition: DocumentsWriter.h:497
void initFlushState(bool onlyDocStore)
static const int32_t BYTES_PER_DEL_DOCID
Rough logic: del docIDs are List<Integer>. Say list allocates ~2X size (2*POINTER). Integer is OBJ_HEADER + int.
Definition: DocumentsWriter.h:127
The IndexingChain must define the getChain(DocumentsWriter) method which returns the DocConsumer that...
Definition: DocumentsWriter.h:423
String docStoreSegment
Definition: DocumentsWriter.h:59
static const int32_t BYTE_BLOCK_MASK
Definition: DocumentsWriter.h:137
int32_t numDocsInRAM
Definition: DocumentsWriter.h:66
MapTermNum getBufferedDeleteTerms()
static const int32_t BYTE_BLOCK_SIZE
Definition: DocumentsWriter.h:136
ByteBlockAllocatorPtr byteBlockAllocator
Definition: DocumentsWriter.h:177
void updateFlushedDocCount(int32_t n)
int32_t flush(bool _closeDocStore)
Flush all pending docs to a new segment.
bool bufferDeleteQueries(Collection< QueryPtr > queries)
void setMaxFieldLength(int32_t maxFieldLength)
int32_t docID
Definition: DocumentsWriter.h:369
int64_t waitQueueResumeBytes
Definition: DocumentsWriter.h:90
SkipDocWriterPtr skipDocWriter
Definition: DocumentsWriter.h:175
boost::shared_ptr< IndexingChain > IndexingChainPtr
Definition: LuceneTypes.h:156
PerDocBufferPtr newPerDocBuffer()
Create and return a new DocWriterBuffer.
static const int32_t POINTER_NUM_BYTE
Definition: DocumentsWriter.h:114
bool aborting
Definition: DocumentsWriter.h:74
Consumer returns this on each doc. This holds any state that must be flushed synchronized "in docID o...
Definition: DocumentsWriter.h:402
bool addDocument(const DocumentPtr &doc, const AnalyzerPtr &analyzer)
Returns true if the caller (IndexWriter) should now flush.
boost::shared_ptr< Similarity > SimilarityPtr
Definition: LuceneTypes.h:435
boost::shared_ptr< PerDocBuffer > PerDocBufferPtr
Definition: LuceneTypes.h:199
int64_t numBytesAlloc
Definition: DocumentsWriter.h:180
int32_t pauseThreads
Definition: DocumentsWriter.h:73
SegmentWriteStatePtr flushState
Definition: DocumentsWriter.h:106
void addOpenFile(const String &name)
String getSegment()
Get current segment name we are writing.
DirectoryPtr directory
Definition: DocumentsWriter.h:156
void remapDeletes(const SegmentInfosPtr &infos, Collection< Collection< int32_t > > docMaps, Collection< int32_t > delCounts, const OneMergePtr &merge, int32_t mergeDocCount)
Called whenever a merge has completed and the merged segments had deletions.
virtual void initialize()
Called directly after instantiation to create objects that depend on this object being fully construc...
bool applyDeletes(const SegmentInfosPtr &infos)
This is the current indexing chain: DocConsumer / DocConsumerPerThread –> code: DocFieldProcessor / ...
Definition: DocumentsWriter.h:447
MapThreadDocumentsWriterThreadState threadBindings
Definition: DocumentsWriter.h:71
int32_t getDocStoreOffset()
Returns the doc offset into the shared doc store for the current buffered docs.
boost::shared_ptr< SegmentWriteState > SegmentWriteStatePtr
Definition: LuceneTypes.h:222
void balanceRAM()
We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds c...
int64_t waitQueuePauseBytes
Definition: DocumentsWriter.h:89
boost::shared_ptr< DocWriter > DocWriterPtr
Definition: LuceneTypes.h:125
static const int32_t INT_NUM_BYTE
Definition: DocumentsWriter.h:115
HashSet< String > _abortedFiles
List of files that were written before last abort()
Definition: DocumentsWriter.h:105
Collection< DocWriterPtr > waiting
Definition: DocumentsWriter.h:480
int32_t nextWriteLoc
Definition: DocumentsWriter.h:482
InfoStreamPtr infoStream
Definition: DocumentsWriter.h:165
bool bufferIsFull
Definition: DocumentsWriter.h:163
static const int32_t BYTE_BLOCK_SHIFT
Initial chunks size of the shared byte[] blocks used to store postings data.
Definition: DocumentsWriter.h:135

clucene.sourceforge.net