Collection

API reference for functions related to getting details and other information such as inferred entities, themes, etc. from a collection of documents that have been analyzed.

lxaGetCollectionDetails

Summary: Retrieves information about the collection. At present, the SalienceCollectionDetails contains only the number of documents in the collection.

This function does not require a function to be used to free used memory.

Returns: Integer return code. To retrieve more information on the return code use lxaGetLastWarnings on the SalienceSession structure.

More information on errors and warnings can be found in the Errors and Warning Codes section of our documentation.

Function Signature:
int lxaGetCollectionDetails(SalienceSession *pSession, SalienceCollectionDetails *pDetails, const char *acConfigurationID);

Parameter

Description

pSession

Pointer to a SalienceSession structure previously returned by a call to lxaOpenSalienceSession

pDetails

Pointer to aSalienceCollectionDetails structure that will get filled in by the call

acConfigurationID

Character string specifying the configuration for the results, blank for the default configuration

Example:
Use lxaLoadLicense to create LexalyticsLicense structure. This will then be given to lxaOpenSalienceSession, which is then used to start a session with Salience.

/* Assuming a SalienceSession (*pSession) has already been opened with
    a valid LexalyticsLicense */

    acText1 = "This is your text.";
    acText2 = "This is also your text.";

    vector<char*> myVector(0);
    myVector.push_back(acText1);
    myVector.push_back(acText2);

    int nSize = myVector.size();

    // Initialize Collection
    SalienceCollection oCollection;
    oCollection.acCollectionName = "MyCollection";
    oCollection.nSize = nSize;
    oCollection.pDocuments =
        (SalienceCollectionDocument*)malloc(sizeof(SalienceCollectionDocument) * nSize);

    //Add text data to collection
    for(int i=0; i < nSize; i++) {
        oCollection.pDocuments[i].acText = myVector[i];
        oCollection.pDocuments[i].acIndentifier = "myDoc";
        oCollection.pDocuments[i].nIsText = 1;
        oCollection.pDocuments[i].nSplitByLine = 0;
    }

    // Prepare collection
    if (lxaPrepareCollection(pSession, &pCollection) != LXA_OK) {
        return 1;
    }

    // Get collection details
    SalienceCollectionDetails oCollectionDets;
    lxaGetCollectionDetails(pSession, &oCollectionDets, "");

    printf("Number of documents: %d\n", oCollectionDets.nSize);
    
    lxaCloseSalienceSession(pSession);

lxaGetCollectionThemes

Summary: Retrieves the themes extracted across all documents in the collection. These results can be adjusted through the available Collection Options, which must be set before this call is made.

The acConfigurationID parameter specifies the configuration for the results, which are returned in a SalienceThemeList structure. Configurations are defined using the method lxaAddSalienceConfiguration.

After use you should free the allocated memory by calling lxaFreeThemeList.

Returns: Integer return code. To retrieve more information on the return code use lxaGetLastWarnings on the SalienceSession structure.

More information on errors and warnings can be found in the Errors and Warning Codes section of our documentation.

Function Signature:
int lxaGetCollectionThemes(SalienceSession *pSession, SalienceThemeList *pResult, const char *acConfigurationID);

Parameter

Description

pSession

Pointer to a SalienceSession structure previously returned by a call to lxaOpenSalienceSession

pResults

Pointer to a SalienceThemeList structure that will get filled in by the call

acConfigurationID

Character string specifying the configuration for the results, blank for the default configuration

Example:
Use lxaLoadLicense to create LexalyticsLicense structure. This will then be given to lxaOpenSalienceSession, which is then used to start a session with Salience.

/* Assuming a SalienceSession (*pSession) has already been opened with 
    a valid LexalyticsLicense */

    acText1 = "This is your text.";
    acText2 = "This is also your text.";

    vector<char*> myVector(0);
    myVector.push_back(acText1);
    myVector.push_back(acText2);

    int nSize = myVector.size();

    // Initialize Collection
    SalienceCollection oCollection;
    oCollection.acCollectionName = "MyCollection";
    oCollection.nSize = nSize;
    oCollection.pDocuments =
        (SalienceCollectionDocument*)malloc(sizeof(SalienceCollectionDocument) * nSize);

    //Add text data to collection
    for(int i=0; i < nSize; i++) {
        oCollection.pDocuments[i].acText = myVector[i];
        oCollection.pDocuments[i].acIndentifier = "myDoc";
        oCollection.pDocuments[i].nIsText = 1;
        oCollection.pDocuments[i].nSplitByLine = 0;
    }

    // Prepare collection
    if (lxaPrepareCollection(pSession, &pCollection) != LXA_OK) {
        return 1;
    }

    // Get collection Themes
    SalienceThemeList oCollectionThemes;
    lxaGetCollectionThemes(pSession, &oCollectionThemes, "");

    for(int i=0; i < oCollectionThemes.nLength; i++) {
        printf("#: %d theme: %s\n", i, oCollectionThemes.pThemes[i]);
    }

    // Free memory used by lxaGetCollectionThemes and close SalienceSession
    lxaFreeThemeList(&oCollectionThemes);
    lxaCloseSalienceSession(pSession);

lxaGetCollectionFacets

Summary: Retrieves the facets extracted across all documents in the collection. These results can be adjusted through the available Collection Options, which must be set before this call is made.

The acConfigurationID parameter specifies the configuration for the results, which are returned in a SalienceFacetList structure. Configurations are defined using the method lxaAddSalienceConfiguration.

After use you should free the allocated memory by calling lxaFreeFacetList.

Returns: Integer return code. To retrieve more information on the return code use lxaGetLastWarnings on the SalienceSession structure.

More information on errors and warnings can be found in the Errors and Warning Codes section of our documentation.

Function Signature:
int lxaGetCollectionFacets(SalienceSession *pSession, SalienceFacetList *pResults, const char *acConfigurationID);

Parameter

Description

pSession

Pointer to a SalienceSession structure previously returned by a call to lxaOpenSalienceSession

pResults

Pointer to a SalienceFacetList structure that will get filled in by the call

acConfigurationID

Character string specifying the configuration for the results, blank for the default configuration

Example:
Use lxaLoadLicense to create LexalyticsLicense structure. This will then be given to lxaOpenSalienceSession, which is then used to start a session with Salience.

/* Assuming a SalienceSession (*pSession) has already been opened with
    a valid LexalyticsLicense */

    acText1 = "This is your text.";
    acText2 = "This is also your text.";

    vector<char*> myVector(0);
    myVector.push_back(acText1);
    myVector.push_back(acText2);

    int nSize = myVector.size();

    // Initialize Collection
    SalienceCollection oCollection;
    oCollection.acCollectionName = "MyCollection";
    oCollection.nSize = nSize;
    oCollection.pDocuments =
        (SalienceCollectionDocument*)malloc(sizeof(SalienceCollectionDocument) * nSize);

    //Add text data to collection
    for(int i=0; i < nSize; i++) {
        oCollection.pDocuments[i].acText = myVector[i];
        oCollection.pDocuments[i].acIndentifier = "myDoc";
        oCollection.pDocuments[i].nIsText = 1;
        oCollection.pDocuments[i].nSplitByLine = 0;
    }

    // Prepare collection
    if (lxaPrepareCollection(pSession, &pCollection) != LXA_OK) {
        return 1;
    }

    // Get collection facets
    SalienceFacetList oCollectionFacets;
    lxaGetCollectionFacets(pSession, &oCollectionFacets, "");

    for(int i=0; i < oCollectionFacets.nLength; i++) {
        printf("#: %d facet: %s\n", i, oCollectionFacets.pFacets[i]);
    }

    // Free memory used by lxaGetCollectionThemes and close SalienceSession
    lxaFreeFacetList(&oCollectionFacets);
    lxaCloseSalienceSession(pSession);

lxaGetCollectionConceptDefinedTopics

Summary: Retrieves the conceptual topics determined for the collection. Before calling this method, you must specify the topic list using the Topic List option.

The acConfigurationID parameter specifies the configuration for the results, which are returned in a SalienceTopicList structure. Configurations are defined using the method lxaAddSalienceConfiguration.

After use you should free the allocated memory by calling lxaFreeTopicList.

Returns: Integer return code. To retrieve more information on the return code use lxaGetLastWarnings on the SalienceSession structure.

More information on errors and warnings can be found in the Errors and Warning Codes section of our documentation.

Function Signature:
int lxaGetCollectionConceptDefinedTopics(SalienceSession *pSession, SalienceTopicList *pResult, const char *acConfigurationID);

Parameter

Description

pSession

Pointer to a SalienceSession structure previously returned by a call to lxaOpenSalienceSession

pResult

Pointer to a SalienceTopicList structure

acConfigurationID

Character string specifying the configuration for the results, blank for the default configuration

Example:
Use lxaLoadLicense to create LexalyticsLicense structure. This will then be given to lxaOpenSalienceSession, which is then used to start a session with Salience.

/* Assuming a SalienceSession (*pSession) has already been opened with
    a valid LexalyticsLicense */

    acText1 = "This is your text.";
    acText2 = "This is also your text.";

    vector<char*> myVector(0);
    myVector.push_back(acText1);
    myVector.push_back(acText2);

    int nSize = myVector.size();

    // Initialize Collection
    SalienceCollection oCollection;
    oCollection.acCollectionName = "MyCollection";
    oCollection.nSize = nSize;
    oCollection.pDocuments =
        (SalienceCollectionDocument*)malloc(sizeof(SalienceCollectionDocument) * nSize);

    //Add text data to collection
    for(int i=0; i < nSize; i++) {
        oCollection.pDocuments[i].acText = myVector[i];
        oCollection.pDocuments[i].acIndentifier = "myDoc";
        oCollection.pDocuments[i].nIsText = 1;
        oCollection.pDocuments[i].nSplitByLine = 0;
    }

    // Prepare collection
    if (lxaPrepareCollection(pSession, &pCollection) != LXA_OK) {
        return 1;
    }

    // Get concept defined topics from a collection
    SalienceTopicList oCollectionTopics;
    lxaGetCollectionConceptDefinedTopics(pSession, &oCollectionTopics, "USER_CONFIG");

    for(int i=0; i < oCollectionTopics.nLength; i++) {
        printf("#: %d topic: %s\n", i, oCollectionTopics.pTopics[i]);
    }

    // Free memory used by lxaGetCollectionThemes and close SalienceSession
    lxaFreeTopicList(&oCollectionTopics);
    lxaCloseSalienceSession(pSession);

lxaGetCollectionQueryDefinedTopics

Summary: Retrieves the topics determined for the collection via user-defined queries. Before calling this method, you must specify the topic list using the Topic List option.

The acConfigurationID parameter specifies the configuration for the results, which are returned in a SalienceTopicList structure. Configurations are defined using the method lxaAddSalienceConfiguration.

After use you should free the allocated memory by calling lxaFreeTopicList.

Returns: Integer return code. To retrieve more information on the return code use lxaGetLastWarnings on the SalienceSession structure.

More information on errors and warnings can be found in the Errors and Warning Codes section of our documentation.

Function Signature:
int lxaGetCollectionQueryDefinedTopics(SalienceSession *pSession, SalienceTopicList *pResult, const char *acConfigurationID);

Parameter

Description

pSession

Pointer to a SalienceSession structure previously returned by a call to lxaOpenSalienceSession

pResult

Pointer to a SalienceTopicList structure

acConfigurationID

Character string specifying the configuration for the results, blank for the default configuration

Example:
Use lxaLoadLicense to create LexalyticsLicense structure. This will then be given to lxaOpenSalienceSession, which is then used to start a session with Salience.

/* Assuming a SalienceSession (*pSession) has already been opened with 
    a valid LexalyticsLicense */

    acText1 = "This is your text.";
    acText2 = "This is also your text.";

    vector<char*> myVector(0);
    myVector.push_back(acText1);
    myVector.push_back(acText2);

    int nSize = myVector.size();

    // Initialize Collection
    SalienceCollection oCollection;
    oCollection.acCollectionName = "MyCollection";
    oCollection.nSize = nSize;
    oCollection.pDocuments =
        (SalienceCollectionDocument*)malloc(sizeof(SalienceCollectionDocument) * nSize);

    //Add text data to collection
    for(int i=0; i < nSize; i++) {
        oCollection.pDocuments[i].acText = myVector[i];
        oCollection.pDocuments[i].acIndentifier = "myDoc";
        oCollection.pDocuments[i].nIsText = 1;
        oCollection.pDocuments[i].nSplitByLine = 0;
    }

    // Prepare collection
    if (lxaPrepareCollection(pSession, &pCollection) != LXA_OK) {
        return 1;
    }

    // Get query defined topics from a collection
    SalienceTopicList oCollectionTopics;
    lxaGetCollectionQueryDefinedTopics(pSession, &oCollectionTopics, "");

    for(int i=0; i < oCollectionTopics.nLength; i++) {
        printf("#: %d topic: %s\n", i, oCollectionTopics.pTopics[i]);
  }

    // Free memory used by lxaGetCollectionThemes and close SalienceSession
    lxaFreeTopicList(&oCollectionTopics);
    lxaCloseSalienceSession(pSession);

lxaGetCollectionEntities

Summary: Retrieves the entities from collection based on model-based or datafile-based entity extraction. Parameters to control entity extraction should be specified by setting Entity Options. Other adjustments can be made through the available Collection Options, which must be set before this call is made.

Sets the pEntities parameter of the passed in SalienceCollectionEntityList to point to an array of SalienceCollectionEntity structures. The nLength parameter is set to the length of this array.

The acConfigurationID parameter specifies the configuration for the results. Configurations are defined using the method lxaAddSalienceConfiguration.

After use you should free the allocated memory by calling lxaFreeCollectionEntityList.

Returns: Integer return code. LXA_OK when entity extraction completes successfully. When entity extraction completes with non-fatal issues LXA_OK_WITH_WARNINGS is returned.

To retrieve more information on the return code use lxaGetLastWarnings on the SalienceSession structure.

More information on errors and warnings can be found in the Errors and Warning Codes section of our documentation.

Function Signature:
int lxaGetCollectionEntities(SalienceSession *pSession, SalienceCollectionEntityList *pResults, const char *acConfigurationID);

Parameter

Description

pSession

Pointer to a SalienceSession structure previously returned by a call to lxaOpenSalienceSession

pResults

Pointer to a SalienceCollectionEntityList structure that will get filled in by the call

acConfigurationID

Character string specifying the configuration for the results, blank for the default configuration

Example:
Use lxaLoadLicense to create LexalyticsLicense structure. This will then be given to lxaOpenSalienceSession, which is then used to start a session with Salience.

/* Assuming a SalienceSession (*pSession) has already been opened with
    a valid LexalyticsLicense */

    acText1 = "This is your text.";
    acText2 = "This is also your text.";

    vector<char*> myVector(0);
    myVector.push_back(acText1);
    myVector.push_back(acText2);

    int nSize = myVector.size();

    // Initialize Collection
    SalienceCollection oCollection;
    oCollection.acCollectionName = "MyCollection";
    oCollection.nSize = nSize;
    oCollection.pDocuments =
        (SalienceCollectionDocument*)malloc(sizeof(SalienceCollectionDocument) * nSize);

    //Add text data to collection
    for(int i=0; i < nSize; i++) {
        oCollection.pDocuments[i].acText = myVector[i];
        oCollection.pDocuments[i].acIndentifier = "myDoc";
        oCollection.pDocuments[i].nIsText = 1;
        oCollection.pDocuments[i].nSplitByLine = 0;
    }

    // Prepare collection
    if (lxaPrepareCollection(pSession, &pCollection) != LXA_OK) {
        return 1;
    }

    // Get entities from a collection
    SalienceEntityList oCollectionEntites;
    lxaGetCollectionEntities(pSession, &oCollectionEntites, "");

    for(int i=0; i < oCollectionEntities.nLength; i++) {
        printf("#: %d entity: %s\n", i, oCollectionEntities.pEntites[i]);
    }

    // Free memory used by lxaGetCollectionThemes and close SalienceSession
    lxaFreeEntityList(&oCollectionEntites);
    lxaCloseSalienceSession(pSession);

lxaGetCollectionUserEntities

Summary: Retrieves the user-defined entities from the collection. This is based on the entity list specified through the User Entity List option. Other adjustments can be made through the available Collection Options, which must be set before this call is made.

Sets the pEntities parameter of the passed in SalienceCollectionEntityList to point to an array of SalienceCollectionEntity structures. The nLength parameter is set to the length of this array.

The acConfigurationID parameter specifies the configuration for the results. Configurations are defined using the method lxaAddSalienceConfiguration.

After use you should free the allocated memory by calling lxaFreeCollectionEntityList.

Returns: Integer return code. LXA_OK when entity extraction completes successfully. When entity extraction completes with non-fatal issues LXA_OK_WITH_WARNINGS is returned.

To retrieve more information on the return code use lxaGetLastWarnings on the SalienceSession structure.

More information on errors and warnings can be found in the Errors and Warning Codes section of our documentation.

Function Signature:
int lxaGetCollectionUserEntities(SalienceSession *pSession, SalienceCollectionEntityList *pResults, const char *acConfigurationID);

Parameter

Description

pSession

Pointer to a SalienceSession structure previously returned by a call to lxaOpenSalienceSession

pResults

Pointer to a SalienceCollectionEntityList structure that will get filled in by the call

acConfigurationID

Character string specifying the configuration for the results, blank for the default configuration

Example:
Use lxaLoadLicense to create LexalyticsLicense structure. This will then be given to lxaOpenSalienceSession, which is then used to start a session with Salience.

/* Assuming a SalienceSession (*pSession) has already been opened with
    a valid LexalyticsLicense */

    acText1 = "This is your text.";
    acText2 = "This is also your text.";

    vector<char*> myVector(0);
    myVector.push_back(acText1);
    myVector.push_back(acText2);

    int nSize = myVector.size();

    // Initialize Collection
    SalienceCollection oCollection;
    oCollection.acCollectionName = "MyCollection";
    oCollection.nSize = nSize;
    oCollection.pDocuments =
        (SalienceCollectionDocument*)malloc(sizeof(SalienceCollectionDocument) * nSize);

    //Add text data to collection
    for(int i=0; i < nSize; i++) {
        oCollection.pDocuments[i].acText = myVector[i];
        oCollection.pDocuments[i].acIndentifier = "myDoc";
        oCollection.pDocuments[i].nIsText = 1;
        oCollection.pDocuments[i].nSplitByLine = 0;
    }

    // Prepare collection
    if (lxaPrepareCollection(pSession, &pCollection) != LXA_OK) {
        return 1;
    }

    // Get user defined entities from a collection
    SalienceEntityList oCollectionEntites;
    lxaGetCollectionUserEntities(pSession, &oCollectionEntites, "");

    for(int i=0; i < oCollectionEntities.nLength; i++) {
        printf("#: %d entity: %s\n", i, oCollectionEntities.pEntites[i]);
    }

    // Free memory used by lxaGetCollectionThemes and close SalienceSession
    lxaFreeEntityList(&oCollectionEntites);
    lxaCloseSalienceSession(pSession);

lxaGetNamedEntityMarkup

Summary: Provides a structured representation of the document with annotation of entities, defined by the user, identified within the text. The SalienceDocument structure contains a set of SalienceSentence structures, which contain SalienceWord structures that contain information about the entities within the document.

The acConfigurationID parameter specifies the configuration for the results. Configurations are defined using the method lxaAddSalienceConfiguration.

After use, you should free allocated memory by calling lxaFreeDocument.

Returns: Integer return code. LXA_OK when entity extraction completes successfully. When entity extraction completes with non-fatal issues LXA_OK_WITH_WARNINGS is returned.

To retrieve more information on the return code use lxaGetLastWarnings on the SalienceSession structure.

More information on errors and warnings can be found in the Errors and Warning Codes section of our documentation.

Function Signature:
int lxaGetNamedEntityMarkup(SalienceSession *pSession, SalienceDocument *pDocument, const char *acConfigurationID);

Parameter

Description

pSession

Pointer to a SalienceSession structure previously returned by a call to lxaOpenSalienceSession

pDocument

Pointer to a SalienceDocument structure that will get filled in by the call

acConfigurationID

Character string specifying the configuration for the results, blank for the default configuration

Example:
Use lxaLoadLicense to create LexalyticsLicense structure. This will then be given to lxaOpenSalienceSession, which is then used to start a session with Salience.

/* This code example assumes a SalienceSession has already been created with a valid
   Lexalytics license. */

  std::string sMarkup = "";
  int nPrimaryId = -1;
  int nSecondaryId = -1;
  std::string sType = "";

    // Call to lxaGetNamedEntityMarkup and store in oDocument
  SalienceDocument oDocument;
  lxaGetNamedEntityMarkup(pSession, &oDocument);

    // Convert to markup for named entities
  for(size_t i = 0; i < oDocument.nSentenceCount; i++) {
    for(size_t j = 0; j < oDocument.pSentences[i].nLength; j++) {
      if(oDocument.pSentences[i].pTokens[j].nId == -1) {
        if(nPrimaryId != -1) {
          sMarkup += "</";
          sMarkup += sType;
          sMarkup += ">";
          nPrimaryId = -1;
          nSecondaryId = -1;
        }

        if(oDocument.pSentences[i].pTokens[j].nPostFixed == 0) {
          sMarkup += " ";
        }

        sMarkup += oDocument.pSentences[i].pTokens[j].acToken;
      }

      else {
        if(nPrimaryId == oDocument.pSentences[i].pTokens[j].nId && 
           nSecondaryId == oDocument.pSentences[i].pTokens[j].nSecondaryId) {
          if(oDocument.pSentences[i].pTokens[j].nPostFixed == 0) {
            sMarkup += " ";
          }

          sMarkup += oDocument.pSentences[i].pTokens[j].acToken;
        }

        else {
          if(nPrimaryId != -1) {
            sMarkup += "</";
            sMarkup += sType;
            sMarkup += ">";
          }

          if(oDocument.pSentences[i].pTokens[j].nPostFixed == 0) {
            sMarkup += " ";
          }

          nPrimaryId = oDocument.pSentences[i].pTokens[j].nId;
          nSecondaryId = oDocument.pSentences[i].pTokens[j].nSecondaryId;
          sType = oDocument.pSentences[i].pTokens[j].acEntityType;
          sMarkup += "<";
          sMarkup += sType;
          sMarkup += ">";
          sMarkup += oDocument.pSentences[i].pTokens[j].acToken;
        }
      }
    }
  }

  if(nPrimaryId != -1) {
    sMarkup += "</";
    sMarkup += sType;
    sMarkup += ">";
    nPrimaryId = -1;
    nSecondaryId = -1;
  }

    // Send to standard output stream and free memory
  std::cout << sMarkup << std::endl;
  lxaFreeDocument(&oDocument);