com.knowledgebooks.utils
Class Document

java.lang.Object
  extended by com.knowledgebooks.utils.Document

public class Document
extends java.lang.Object

Utilities finding sentence breaks in documents.

Copyright 2002-2008 by Mark Watson. All rights reserved.

This software is not public domain. It can be legally used under either of the following licenses:

1. KnowledgeBooks.com Non Commercial Royality Free License
2. KnowledgeBooks.com Commercial Use License

see www.knowledgebooks.com for details


Field Summary
 int[] endSentenceBoundary
           
 int[] startSentenceBoundary
           
 
Constructor Summary
Document(java.util.List<java.lang.String> words)
           
Document(java.lang.String words)
           
 
Method Summary
 int getNumSentences()
           
 int getNumWords()
           
 java.lang.String getSentence(int index)
           
 IPair getSentenceBoundary(int sentenceIndex)
           
 IPair getSentenceBoundaryFromWordIndex(int wordIndex)
           
 java.util.List<java.lang.String> getTokens()
           
 java.lang.String getWord(int wordIndex)
           
 java.lang.String toString()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

startSentenceBoundary

public int[] startSentenceBoundary

endSentenceBoundary

public int[] endSentenceBoundary
Constructor Detail

Document

public Document(java.lang.String words)
Parameters:
words - a string containing plain text

Document

public Document(java.util.List<java.lang.String> words)
Parameters:
words - a list of string tokens
Method Detail

toString

public java.lang.String toString()
Overrides:
toString in class java.lang.Object

getTokens

public java.util.List<java.lang.String> getTokens()
Returns:
a list of string tokens in this document

getNumWords

public int getNumWords()
Returns:

getNumSentences

public int getNumSentences()
Returns:

getWord

public java.lang.String getWord(int wordIndex)
Parameters:
wordIndex -
Returns:

getSentenceBoundaryFromWordIndex

public IPair getSentenceBoundaryFromWordIndex(int wordIndex)
Parameters:
wordIndex -
Returns:

getSentenceBoundary

public IPair getSentenceBoundary(int sentenceIndex)
Parameters:
sentenceIndex -
Returns:

getSentence

public java.lang.String getSentence(int index)
Parameters:
index - senetence index in document
Returns:
a string containing the specified sentence