public abstract class SimilarityESA extends Esa
cat.lump.ir.sim.EsaGenerator#
Modifier and Type | Field and Description |
---|---|
protected EsaGenerator |
esaGen
A generator of ESA vectorial representations
|
protected java.lang.String |
objectA
Identifier for object A
|
protected java.lang.String |
objectB
Identifier for object B
|
protected boolean |
overrideObjects
Whether previously computed semantic representations
should be discarded (if they exist)
|
protected java.io.File |
textsA
Path to documents A
|
protected java.io.File |
textsB
Path to documents B
|
esaVectorsA, esaVectorsB, log
Constructor and Description |
---|
SimilarityESA(java.lang.String indexPath,
java.lang.String lan,
java.lang.Boolean overrideObjects)
Constructor.
|
Modifier and Type | Method and Description |
---|---|
protected abstract EsaVectors |
computeVectors(java.io.File documentsPath,
java.lang.String object,
java.lang.String set)
Computes the vectors for the texts in the given set.
|
void |
computeVectorsA()
Compute the characteristic vectors for dataset A
|
void |
computeVectorsB()
Compute the characteristic vectors for dataset B
|
protected boolean |
objectExists(java.io.File object,
java.lang.String id)
Checks whether the vector-representation object exists.
|
protected void |
saveObject(java.io.File objFile,
EsaVectors esa)
Saves a textual representation into an object file
|
protected abstract void |
setDocumentsPath(java.io.File documentsApath,
java.io.File documentsBpath)
A method that loads the texts in collections A and B.
|
protected void |
setObjects()
Set the name of the resulting vector objects
|
computePairwiseSimilarities, computeSimilarities, computeSimilarity, displaySimilarities, documentsExist, exitError, getPairwiseSimilarities, getSimilarities, getSimilaritiesMatrix, getSimilarity, getSimilarity
protected java.io.File textsA
protected java.io.File textsB
protected boolean overrideObjects
protected EsaGenerator esaGen
protected java.lang.String objectA
protected java.lang.String objectB
public SimilarityESA(java.lang.String indexPath, java.lang.String lan, java.lang.Boolean overrideObjects)
indexPath
- path to Lucene's indexlan
- language to work withoverrideObjects
- if previously computed vectors will be discardedprotected abstract void setDocumentsPath(java.io.File documentsApath, java.io.File documentsBpath)
documentsApath
- documentsBpath
- protected void setObjects()
public void computeVectorsA() throws java.lang.ClassNotFoundException, java.io.IOException
java.io.IOException
java.lang.ClassNotFoundException
public void computeVectorsB() throws java.lang.ClassNotFoundException, java.io.IOException
java.io.IOException
java.lang.ClassNotFoundException
protected abstract EsaVectors computeVectors(java.io.File documentsPath, java.lang.String object, java.lang.String set) throws java.lang.ClassNotFoundException, java.io.IOException
documentsPath
- path to the documentsobject
- name of the (previously generated object)set
- whether we are processing A or Bjava.io.IOException
java.lang.ClassNotFoundException
protected void saveObject(java.io.File objFile, EsaVectors esa) throws java.io.FileNotFoundException, java.io.IOException
objFile
- esa
- java.io.FileNotFoundException
java.io.IOException
protected boolean objectExists(java.io.File object, java.lang.String id)
object
- object to checkid
- flag to report