public abstract class SimilarityESA extends Esa
cat.lump.ir.sim.EsaGenerator#| Modifier and Type | Field and Description |
|---|---|
protected EsaGenerator |
esaGen
A generator of ESA vectorial representations
|
protected java.lang.String |
objectA
Identifier for object A
|
protected java.lang.String |
objectB
Identifier for object B
|
protected boolean |
overrideObjects
Whether previously computed semantic representations
should be discarded (if they exist)
|
protected java.io.File |
textsA
Path to documents A
|
protected java.io.File |
textsB
Path to documents B
|
esaVectorsA, esaVectorsB, log| Constructor and Description |
|---|
SimilarityESA(java.lang.String indexPath,
java.lang.String lan,
java.lang.Boolean overrideObjects)
Constructor.
|
| Modifier and Type | Method and Description |
|---|---|
protected abstract EsaVectors |
computeVectors(java.io.File documentsPath,
java.lang.String object,
java.lang.String set)
Computes the vectors for the texts in the given set.
|
void |
computeVectorsA()
Compute the characteristic vectors for dataset A
|
void |
computeVectorsB()
Compute the characteristic vectors for dataset B
|
protected boolean |
objectExists(java.io.File object,
java.lang.String id)
Checks whether the vector-representation object exists.
|
protected void |
saveObject(java.io.File objFile,
EsaVectors esa)
Saves a textual representation into an object file
|
protected abstract void |
setDocumentsPath(java.io.File documentsApath,
java.io.File documentsBpath)
A method that loads the texts in collections A and B.
|
protected void |
setObjects()
Set the name of the resulting vector objects
|
computePairwiseSimilarities, computeSimilarities, computeSimilarity, displaySimilarities, documentsExist, exitError, getPairwiseSimilarities, getSimilarities, getSimilaritiesMatrix, getSimilarity, getSimilarityprotected java.io.File textsA
protected java.io.File textsB
protected boolean overrideObjects
protected EsaGenerator esaGen
protected java.lang.String objectA
protected java.lang.String objectB
public SimilarityESA(java.lang.String indexPath,
java.lang.String lan,
java.lang.Boolean overrideObjects)
indexPath - path to Lucene's indexlan - language to work withoverrideObjects - if previously computed vectors will be discardedprotected abstract void setDocumentsPath(java.io.File documentsApath,
java.io.File documentsBpath)
documentsApath - documentsBpath - protected void setObjects()
public void computeVectorsA()
throws java.lang.ClassNotFoundException,
java.io.IOException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionpublic void computeVectorsB()
throws java.lang.ClassNotFoundException,
java.io.IOException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionprotected abstract EsaVectors computeVectors(java.io.File documentsPath, java.lang.String object, java.lang.String set) throws java.lang.ClassNotFoundException, java.io.IOException
documentsPath - path to the documentsobject - name of the (previously generated object)set - whether we are processing A or Bjava.io.IOExceptionjava.lang.ClassNotFoundExceptionprotected void saveObject(java.io.File objFile,
EsaVectors esa)
throws java.io.FileNotFoundException,
java.io.IOException
objFile - esa - java.io.FileNotFoundExceptionjava.io.IOExceptionprotected boolean objectExists(java.io.File object,
java.lang.String id)
object - object to checkid - flag to report