/*!
* @class IndexQGram
*
* @implements StringTrieConcept
*
* @extends Index
*
* @headerfile <seqan/index.h>
*
* @brief An index based on an array of sorted q-grams.
*
* @signature template <typename TIndex, typename TShapeSpec, typename TSpec>
* class Index<TText, IndexQGram<TShapeSpec[, TSpec]> >;
*
* @tparam TSpec The specializing type. Types: @link
* OpenAdressingTags#OpenAdressing @endlink, Default: void
* @tparam TText The text type. Types: @link String @endlink
* @tparam TShapeSpec The @link Shape @endlink specialization type.
*
* The fibres (see @link Index @endlink and @link Fibre @endlink) of this index
* are a suffix array sorted by the first <i>q</i> characters (see @link
* QGramIndexFibres#QGramSA @endlink) and a <i>q</i>-gram directory (see @link
* QGramIndexFibres#QGramDir @endlink).
*
* The size of the q-gram directory is <i>Σ<sup>q</sup></i>. On a 32 bit
* system the <i>q</i>-gram length is limited to 3 for <tt>char</tt> alphabets
* or 13-14 for @link Dna @endlink alphabets.
*
* Consider to use the @link OpenAddressingQGramIndex @endlink for longer
* <i>q</i>-grams if you don't need <i>q</i>-grams to be sorted.
*
* @see QGramIndexFibres
*
* @fn IndexQGram::Index
*
* @brief Constructor
*
* @signature Index::Index();
* @signature Index::Index(index);
* @signature Index::Index(text[, shape]);
*
* @param[in] index Other Index object to copy from.
* @param[in] text The text to be indexed.
* @param[in] shape The q gram @link Shape @endlink to be applied. (optional)
*
* @fn IndexQGram#indexSA
*
* @headerfile <seqan/index.h>
*
* @brief Shortcut for <tt>getFibre(.., QGramSA)</tt>.
*
* @signature TSa indexSA(index);
*
* @param[in] index The @link IndexQGram @endlink object holding the fibre.
*
* @return TSa A reference to the @link QGramIndexFibres#QGramSA @endlink fibre
* (q-gram positions).
*
* @fn IndexQGram#saAt
*
* @headerfile <seqan/index.h>
*
* @brief Shortcut for <tt>value(indexSA(..), ..)</tt>.
*
* @note Advanced functionality, not commonly used.
*
* @signature TValue saAt(position, index);
*
* @param[in] index The @link IndexQGram @endlink object holding the fibre.
* @param[in] position A position in the array on which the value should be
* accessed.
*
* @return TValue A reference or proxy to the value in the @link
* QGramIndexFibres#QGramSA @endlink fibre. To be more precise, a
* reference to a position containing a value of type @link
* SAValue @endlink is returned (or a proxy).
*
* @fn IndexQGram#indexDir
*
* @headerfile <seqan/index.h>
*
* @brief Shortcut for <tt>getFibre(.., QGramDir())</tt>.
*
* @signature TFibre indexDir(index);
*
* @param[in] index The @link IndexQGram @endlink object holding the fibre.
*
* @return TFibre A reference to the @link QGramIndexFibres#QGramDir @endlink
* fibre (q-gram directory).
*
* @fn IndexQGram#dirAt
*
* @headerfile <seqan/index.h>
*
* @brief Shortcut for <tt>value(indexDir(index), position)</tt>.
*
* @signature TFibre dirAt(position, index);
*
* @param[in] index The IndexQGram object holding the fibre.
* @param[in] position A position in the array on which the value should be
* accessed.
*
* @return TFibre A reference to the @link QGramIndexFibres#QGramDir @endlink
* fibre.
*
* @fn IndexQGram#indexCounts
*
* @headerfile <seqan/index.h>
*
* @brief Shortcut for <tt>getFibre(index, QGramCounts())</tt>.
*
* @signature TFibre indexCounts(index);
*
* @param[in] index The @link IndexQGram @endlink object holding the fibre.
*
* @return TFibre A reference to the @link QGramIndexFibres#QGramCounts @endlink
* fibre (counts array).
*
* @fn IndexQGram#indexCountsDir
*
* @headerfile <seqan/index.h>
*
* @brief Shortcut for <tt>getFibre(index, QGramCountsDir())</tt>.
*
* @signature TFibre indexCountsDir(index);
*
* @param[in] index The IndexQGram object holding the fibre.
*
* @return TFibre A reference to the @link QGramIndexFibres#QGramCountsDir
* @endlink fibre (counts directory).
*
* @fn IndexQGram#indexBucketMap
*
* @headerfile <seqan/index.h>
*
* @brief Shortcut for <tt>getFibre(index, QGramBucketMap())</tt>.
*
* @signature TFibre indexBucketMap(index);
*
* @param[in] index The IndexQGram object holding the fibre.
*
* @return TFibre A reference to the @link QGramIndexFibres#QGramBucketMap
* @endlink fibre (maps q-gram hashes to buckets).
*
* @fn IndexQGram#indexShape
*
* @headerfile <seqan/index.h>
*
* @brief Shortcut for <tt>getFibre(index, QGramShape())</tt>.
*
* @signature TFibre indexShape(index);
*
* @param[in] index The @link Index @endlink object holding the fibre. Types:
* @link IndexQGram @endlink
*
* @return TFibre Returns a reference to the @link Shape @endlink object of a
* q-gram index. Formally, this is a reference to the @link
* QGramIndexFibres#QGramShape @endlink fibre.
*
* @fn IndexQGram#getStepSize
*
* @headerfile <seqan/index.h>
*
* @brief Return the <i>q</i>-gram step size used for index creation.
*
* @signature TSize getStepSize(index);
*
* @param[in] index A IndexQGram object.
*
* @return TSize The step size of type @link Index#Size @endlink. If <i>x</i> is
* returned every <i>x</i><sup>th</sup> <i>q</i>-gram is stored in
* the index.
*
* @see IndexQGram#setStepSize
*
* @fn IndexQGram#setStepSize
*
* @headerfile <seqan/index.h>
*
* @brief Change the <i>q</i>-gram step size used for index creation.
*
* @signature void setStepSize(index, stepSize);
*
* @param[in,out] index The IndexQGram to modify.
* @param[in] stepSize The step size, @link IntegerConcept @endlink; Each
* <tt>stepSize</tt><sup>th</sup> <i>q</i>-gram will be
* stored.
*
* The default step size of a <i>q</i>-gram index is 1, which corresponds to all
* overlapping <i>q</i>-grams. To take effect of changing the <tt>stepSize</tt>
* the <i>q</i>-gram index should be empty or recreated.
*
* A <tt>stepSize</tt> of 0 corresponds to <tt>stepSize =
* length(indexShape(index))</tt>, i.e. all non-overlapping q-grams.
*
* @see IndexQGram#getStepSize
*
* @fn IndexQGram#createQGramIndex
*
* @headerfile <seqan/index.h>
*
* @brief Builds a <i>q</i>-gram index on a sequence.
*
* @warning This function should not be called directly. Please use @link
* Index#indexCreate @endlink or @link Index#indexRequire @endlink. The
* resulting tables must have appropriate size before calling this
* function.
*
* @signature void createQGramIndex(index);
* @signature void createQGramIndex(sa, dir, bucketMap, text, shape, stepSize);
* [DEPRECATED]
*
* @param[out] index The IndexQGram to create.
* @param[out] sa The resulting list in which all <i>q</i>-grams are sorted
* alphabetically.
* @param[out] dir The resulting array that indicates at which position in index
* the corresponding <i>q</i>-grams
* @param[in] bucketMap Stores the <i>q</i>-gram hashes for the openaddressing
* hash maps, see @link IndexQGram#indexBucketMap @endlink.
* If bucketMap is of the type @link Nothing @endlink the
* <i>q</i>-gram hash determines the bucket address in the
* index.
* @param[in] text The @link TextConcept @endlink object to build the index for.
* @param[in] shape The shape to be used. Types: @link Shape @endlink can be
* found.
* @param[in] stepSize Store every <tt>stepSize</tt>'th <i>q</i>-gram in the
* index, @link IntegerConcept @endlink.
*
* The resulting <i>q</i>-gram <tt>index</tt> contains the sorted list of
* qgrams. For each <i>q</i>-gram <tt>dir</tt> contains the first position in
* index that corresponds to this <i>q</i>-gram.
*
* @fn IndexQGram#createQGramIndexSAOnly
*
* @headerfile <seqan/index.h>
*
* @brief Builds the suffix array of a q-gram index on a sequence.
*
* @warning This function should not be called directly. Please use @link
* Index#indexCreate @endlink or @link Index#indexRequire @endlink. The
* resulting tables must have appropriate size before calling this
* function.
*
* @signature void createQGramIndexSAOnly(sa, text, shape, stepSize)
*
* @param[out] sa The resulting list in which all q-grams are sorted
* alphabetically, @link String @endlink object of @link SAValue
* @endlink of the underlying @link TextConcept text @endlink.
* @param[in] text The sequence, a @link TextConcept @endlink object.
* @param[in] shape The @link Shape @endlink to be used. q is the length of this
* shape.
* @param[in] stepSize Store every <tt>stepSize</tt>'th q-gram in the index,
* @link IntegerConcept @endlink.
*
* @fn IndexQGram#createQGramIndexDirOnly
*
* @headerfile <seqan/index.h>
*
* @brief Builds the directory of a <i>q</i>-gram index on a sequence.
*
* @warning This function should not be called directly. Please use @link
* Index#indexCreate @endlink or @link Index#indexRequire @endlink. The
* resulting tables must have appropriate size before calling this
* function.
*
* @signature void createQGramIndexDirOnly(dir, bucketMap, text, shape,
* stepSize);
*
* @param[out] dir The resulting array that indicates at which position in index
* the corresponding q-grams can be found.
* @param[out] bucketMap Stores the q-gram hashes for the openaddressing hash
* maps, see @link IndexQGram#indexBucketMap @endlink. If
* bucketMap is of the type @link Nothing @endlink the
* q-gram hash determines the bucket address in the index.
* @param[in] text The sequence, @link TextConcept @endlink.
* @param[in] stepSize Store every <tt>stepSize</tt>'th q-gram in the index,
* @link IntegerConcept @endlink.
* @param[in] shape The @link Shape @endlink to be used.
*
* The resulting <tt>index</tt> contains the sorted list of qgrams. For each
* possible q-gram pos contains the first position in index that corresponds to
* this q-gram.
*
* @fn IndexQGram#createCountArray
*
* @headerfile <seqan/index.h>
*
* @brief Builds an index on a StringSet storing how often a <i>q</i>-gram
* occurs in each sequence.
*
* @warning This function should not be called directly. Please use @link
* Index#indexCreate @endlink or @link Index#indexRequire @endlink. The
* resulting tables must have appropriate size before calling this
* function.
*
* @signature void createCountsArray(counts, dir, bucketMap, stringSet, shape,
* stepSize);
*
* @param[out] counts The resulting @link String @endlink of pairs <i>(seqNo,
* count)</i>.
* @param[out] dir The resulting array that indicates at which position in the
* count table the corresponding a certain q-gram can be found.
* @param[out] bucketMap Stores the q-gram hashes for the openaddressing hash
* maps, see @link IndexQGram#indexBucketMap @endlink. If
* bucketMap is of the type @link Nothing @endlink the
* q-gram hash determines the bucket address in the index.
* @param[in] stringSet The @link StringSet @endlink.
* @param[in] shape The @link Shape @endlink to be used.
* @param[in] stepSize Store every <tt>stepSize</tt><sup>th</sup> <i>q</i>-gram
* in the index, @link IntegerConcept @endlink.
*
* @fn IndexQGram#getKmerSimilarityMatrix
*
* @headerfile <seqan/index.h>
*
* @brief Creates a matrix storing the number of common q-grams between all
* pairs of sequences.
*
* @signature void getKmerSimilarityMatrix(index, distMat[, seqSet]);
*
* @param[out] distMat The resulting q-gram similarity matrix. Types: @link
* ContainerConcept @endlink
* @param[in] seqSet Contains sequence numbers if only a subset of sequences
* should be compared. Types: @link ContainerConcept @endlink
* @param[in] index The @link IndexQGram @endlink to use.
*
* <tt>distMat</tt> need to be a container of a floating point type and will be
* resized to <tt>seqCount * seqCount</tt>, where <tt>seqCount</tt> is the
* number of sequences in the index/in <tt>seqSet</tt>. The fraction of common
* q-grams between sequence <tt>i</tt> and <tt>j</tt> is stored at position
* <tt>i*seqCount + j</tt>. It sums up the minimum number of q-gram occurrences
* between both sequences for each q-gram and normalizes it.
*
* @fn IndexQGram#range
*
* @headerfile <seqan/index.h>
*
* @brief Returns the suffix array interval borders of a q-gram in the index
* text.
*
* @signature TPair range(index, shape);
*
* @param[in] index The IndexQGram object to query.
* @param[in] shape The @link Shape @endlink object to use for the query.
*
* @return TPair All positions where the q-gram stored in <tt>shape</tt> occurs
* in the text (see @link QGramIndexFibres#QGramText @endlink) are
* stored in a contiguous range of the suffix array.
* <tt>range</tt> returns begin and end position of this range. If
* the type of <tt>index</tt> is <tt>TIndex</tt> the return type
* is <tt>Pair<Size<TIndex>::Type></tt>.
*
* The necessary index tables are built on-demand via @link Index#indexRequire
* @endlink if index is not <tt>const</tt>.
*
* @fn IndexQGram#getOccurrence
*
* @headerfile <seqan/index.h>
*
* @brief Returns an occurrence a q-gram in the index text.
*
* @signature TSAValue getOccurrence(index, shape);
*
* @param[in] index The IndexQGram object to query.
* @param[in] shape The @link Shape @endlink object to use for the query. The
* shape stores the <i>q</i>-gram of the last call ot @link
* Shape#hash @endlink or @link Shape#hashNext @endlink.
*
* @return TSAValue A position where the <i>q</i>-gram stored in <tt>shape</tt>
* occurs in the text (see @link QGramIndexFibres#QGramText
* @endlink). Type: @link SAValue @endlink of the index type.
*
* @fn IndexQGram#getOccurrences
*
* @headerfile <seqan/index.h>
*
* @brief Returns all occurrences of a q-gram in the index text.
*
* @signature TSAInfix getOccurrences(index, shape);
*
* @param[in] index A IndexQGram to query.
* @param[in] shape A @link Shape @endlink object.
*
* @return TSAInfix All positions where the q-gram stored in <tt>shape</tt>
* occurs in the text (see @link QGramIndexFibres#QGramText
* @endlink). Tupes: @link SegmentableConcept#Infix
* @endlink<@link Fibre @endlink<TIndex,
* QGramSA>::Type>::Type>.
*
* The necessary index tables are built on-demand via @link Index#indexRequire
* @endlink if index is not <tt>const</tt>.
*
* @see DemoMummy
* @see DemoSupermaximalRepeats
* @see DemoMaximalUniqueMatches
* @see VSTreeIterator#isUnique
* @see VSTreeIterator#getFrequency
* @see VSTreeIterator#isPartiallyLeftExtensible
* @see VSTreeIterator#isLeftMaximal
*
* @fn IndexQGram#countOccurrences
*
* @headerfile <seqan/index.h>
*
* @brief Returns the number of occurrences of a q-gram in the index text.
*
* @signature TSize countOccurrences(index, shape);
*
* @param[in] index IndexQGram object to query.
* @param[in] shape A @link Shape @endlink object to use for the query.
*
* @return TSize The number of positions where the q-gram stored in
* <tt>shape</tt> occurs in the text (see @link
* QGramIndexFibres#QGramText @endlink). Metafunction: @link
* Index#Size @endlink.
*
* The necessary index tables are built on-demand via @link Index#indexRequire
* @endlink if index is not <tt>const</tt>.
*
* Demo: Demo.Supermaximal Repeats
*
* Demo: Demo.Index countChildren
*
* @fn IndexQGram#countOccurrencesMultiple
*
* @headerfile <seqan/index.h>
*
* @brief Returns the number of occurrences of a q-gram for every sequence of a
* @link StringSet @endlink .
*
* @signature TCountInfix countOccurrencesMultiple(index, shape);
*
* @param[in] index A IndexQGram of a @link StringSet @endlink.
* @param[in] shape A @link Shape @endlink.
*
* @return TCountInfix A sequence of @link Pair pairs @endlink (seqNo,count),
* count > 0. For every @link StringSet @endlink sequence
* the q-gram occurs in, seqNo is the sequence number and
* count the number of occurrences. If the type of
* <tt>index</tt> is <tt>TIndex</tt> the return type is
* <tt>Infix<Fibre<TIndex, QGramCounts>::Type
* const>::Type</tt>.
*
* The necessary index tables are built on-demand via @link Index#indexRequire
* @endlink if index is not <tt>const</tt>.
*
* @see VSTreeIterator#countOccurrences
*/