Functions to parse, convert, manipulate, create, and compare (nucleic acid sequence) strings. More...
Functions to parse, convert, manipulate, create, and compare (nucleic acid sequence) strings.
Files | |
file | strings.h |
General utility- and helper-functions for RNA sequence and structure strings used throughout the ViennaRNA Package. | |
Macros | |
#define | XSTR(s) STR(s) |
Stringify a macro after expansion. | |
#define | STR(s) #s |
Stringify a macro argument. | |
#define | FILENAME_MAX_LENGTH 80 |
Maximum length of filenames that are generated by our programs. More... | |
#define | FILENAME_ID_LENGTH 42 |
Maximum length of id taken from fasta header for filename generation. More... | |
#define | VRNA_TRIM_LEADING 1U |
Trim only characters leading the string. More... | |
#define | VRNA_TRIM_TRAILING 2U |
Trim only characters trailing the string. More... | |
#define | VRNA_TRIM_IN_BETWEEN 4U |
Trim only characters within the string. More... | |
#define | VRNA_TRIM_SUBST_BY_FIRST 8U |
Replace remaining characters after trimming with the first delimiter in list. More... | |
#define | VRNA_TRIM_DEFAULT ( VRNA_TRIM_LEADING | VRNA_TRIM_TRAILING ) |
Default settings for trimming, i.e. trim leading and trailing. More... | |
#define | VRNA_TRIM_ALL ( VRNA_TRIM_DEFAULT | VRNA_TRIM_IN_BETWEEN ) |
Trim characters anywhere in the string. More... | |
Functions | |
char * | vrna_strdup_printf (const char *format,...) |
Safely create a formatted string. More... | |
char * | vrna_strdup_vprintf (const char *format, va_list argp) |
Safely create a formatted string. More... | |
int | vrna_strcat_printf (char **dest, const char *format,...) |
Safely append a formatted string to another string. More... | |
int | vrna_strcat_vprintf (char **dest, const char *format, va_list args) |
Safely append a formatted string to another string. More... | |
unsigned int | vrna_strtrim (char *string, const char *delimiters, unsigned int keep, unsigned int options) |
Trim a string by removing (multiple) occurences of a particular character. More... | |
char ** | vrna_strsplit (const char *string, const char *delimiter) |
Split a string into tokens using a delimiting character. More... | |
char * | vrna_random_string (int l, const char symbols[]) |
Create a random string using characters from a specified symbol set. More... | |
int | vrna_hamming_distance (const char *s1, const char *s2) |
Calculate hamming distance between two sequences. More... | |
int | vrna_hamming_distance_bound (const char *s1, const char *s2, int n) |
Calculate hamming distance between two sequences up to a specified length. More... | |
void | vrna_seq_toRNA (char *sequence) |
Convert an input sequence (possibly containing DNA alphabet characters) to RNA alphabet. More... | |
void | vrna_seq_toupper (char *sequence) |
Convert an input sequence to uppercase. More... | |
void | vrna_seq_reverse (char *sequence) |
Reverse a string in-place. More... | |
char * | vrna_DNA_complement (const char *sequence) |
Retrieve a DNA sequence which resembles the complement of the input sequence. More... | |
char * | vrna_seq_ungapped (const char *sequence) |
Remove gap characters from a nucleotide sequence. More... | |
char * | vrna_cut_point_insert (const char *string, int cp) |
Add a separating '&' character into a string according to cut-point position. More... | |
char * | vrna_cut_point_remove (const char *string, int *cp) |
Remove a separating '&' character from a string. More... | |
#define FILENAME_MAX_LENGTH 80 |
#include <ViennaRNA/utils/strings.h>
Maximum length of filenames that are generated by our programs.
This definition should be used throughout the complete ViennaRNA package wherever a static array holding filenames of output files is declared.
#define FILENAME_ID_LENGTH 42 |
#include <ViennaRNA/utils/strings.h>
Maximum length of id taken from fasta header for filename generation.
this has to be smaller than FILENAME_MAX_LENGTH since in most cases, some suffix will be appended to the ID
#define VRNA_TRIM_LEADING 1U |
#include <ViennaRNA/utils/strings.h>
Trim only characters leading the string.
#define VRNA_TRIM_TRAILING 2U |
#include <ViennaRNA/utils/strings.h>
Trim only characters trailing the string.
#define VRNA_TRIM_IN_BETWEEN 4U |
#define VRNA_TRIM_SUBST_BY_FIRST 8U |
#include <ViennaRNA/utils/strings.h>
Replace remaining characters after trimming with the first delimiter in list.
#define VRNA_TRIM_DEFAULT ( VRNA_TRIM_LEADING | VRNA_TRIM_TRAILING ) |
#include <ViennaRNA/utils/strings.h>
Default settings for trimming, i.e. trim leading and trailing.
#define VRNA_TRIM_ALL ( VRNA_TRIM_DEFAULT | VRNA_TRIM_IN_BETWEEN ) |
char * vrna_strdup_printf | ( | const char * | format, |
... | |||
) |
#include <ViennaRNA/utils/strings.h>
Safely create a formatted string.
This function is a safe implementation for creating a formatted character array, similar to sprintf. Internally, it uses the asprintf function if available to dynamically allocate a large enough character array to store the supplied content. If asprintf is not available, mimic it's behavior using vsnprintf.
format | The format string (See also asprintf) |
... | The list of variables used to fill the format string |
char * vrna_strdup_vprintf | ( | const char * | format, |
va_list | argp | ||
) |
#include <ViennaRNA/utils/strings.h>
Safely create a formatted string.
This function is the va_list version of vrna_strdup_printf()
format | The format string (See also asprintf) |
argp | The list of arguments to fill the format string |
int vrna_strcat_printf | ( | char ** | dest, |
const char * | format, | ||
... | |||
) |
#include <ViennaRNA/utils/strings.h>
Safely append a formatted string to another string.
This function is a safe implementation for appending a formatted character array, similar to a cobination of strcat and sprintf. The function automatically allocates enough memory to store both, the previous content stored at dest
and the appended format string. If the dest
pointer is NULL, the function allocate memory only for the format string. The function returns the number of characters in the resulting string or -1 in case of an error.
dest | The address of a char *pointer where the formatted string is to be appended |
format | The format string (See also sprintf) |
... | The list of variables used to fill the format string |
int vrna_strcat_vprintf | ( | char ** | dest, |
const char * | format, | ||
va_list | args | ||
) |
#include <ViennaRNA/utils/strings.h>
Safely append a formatted string to another string.
This function is the va_list version of vrna_strcat_printf()
dest | The address of a char *pointer where the formatted string is to be appended |
format | The format string (See also sprintf) |
args | The list of argument to fill the format string |
unsigned int vrna_strtrim | ( | char * | string, |
const char * | delimiters, | ||
unsigned int | keep, | ||
unsigned int | options | ||
) |
#include <ViennaRNA/utils/strings.h>
Trim a string by removing (multiple) occurences of a particular character.
This function removes (multiple) consecutive occurences of a set of characters (delimiters
) within an input string. It may be used to remove leading and/or trailing whitespaces or to restrict the maximum number of consecutive occurences of the delimiting characters delimiters
. Setting keep=0
removes all occurences, while other values reduce multiple consecutive occurences to at most keep
delimiters. This might be useful if one would like to reduce multiple whitespaces to a single one, or to remove empty fields within a comma-separated value string.
The parameter delimiters
may be a pointer to a 0-terminated char string containing a set of any ASCII character. If NULL is passed as delimiter set or an empty char string, all whitespace characters are trimmed. The options
parameter is a bit vector that specifies which part of the string should undergo trimming. The implementation distinguishes the leading (VRNA_TRIM_LEADING), trailing (VRNA_TRIM_TRAILING), and in-between (VRNA_TRIM_IN_BETWEEN) part with respect to the delimiter set. Combinations of these parts can be specified by using logical-or operator.
The following example code removes all leading and trailing whitespace characters from the input string:
keep
parameter, the first keep
delimiters are preserved within the string. Use VRNA_TRIM_SUBST_BY_FIRST to substitute all remaining delimiting characters with the first from the delimiters
list.string | The '\0'-terminated input string to trim |
delimiters | The delimiter characters as 0-terminated char array (or NULL) |
keep | The maximum number of consecutive occurences of the delimiter in the output string |
options | The option bit vector specifying the mode of operation |
Since many scripting languages treat strings as immutable objects, this function does not modify the input string directly. Instead, it returns the modified string as second return value, together with the number of removed delimiters.
The scripting language interface provides an overloaded version of this function, with default parameters delimiters=NULL
, keep=0
, and options=VRNA_TRIM_DEFAULT
.
char ** vrna_strsplit | ( | const char * | string, |
const char * | delimiter | ||
) |
#include <ViennaRNA/utils/strings.h>
Split a string into tokens using a delimiting character.
This function splits a string into an array of strings using a single character that delimits the elements within the string. The default delimiter is the ampersand '&'
and will be used when NULL
is passed as a second argument. The returned list is NULL terminated, i.e. the last element is NULL
. If the delimiter is not found, the returned list contains exactly one element: the input string.
For instance, the following code:
produces this output:
* GGGG * CCCC * AAAAA *
and properly free's the memory occupied by the returned element array.
string | The input string that should be split into elements |
delimiter | The delimiting character. If NULL , the delimiter is "&" |
NULL
terminated list of the elements in the string char * vrna_random_string | ( | int | l, |
const char | symbols[] | ||
) |
#include <ViennaRNA/utils/strings.h>
Create a random string using characters from a specified symbol set.
l | The length of the sequence |
symbols | The symbol set |
int vrna_hamming_distance | ( | const char * | s1, |
const char * | s2 | ||
) |
#include <ViennaRNA/utils/strings.h>
Calculate hamming distance between two sequences.
s1 | The first sequence |
s2 | The second sequence |
int vrna_hamming_distance_bound | ( | const char * | s1, |
const char * | s2, | ||
int | n | ||
) |
#include <ViennaRNA/utils/strings.h>
Calculate hamming distance between two sequences up to a specified length.
This function is similar to vrna_hamming_distance() but instead of comparing both sequences up to their actual length only the first 'n' characters are taken into account
s1 | The first sequence |
s2 | The second sequence |
n | The length of the subsequences to consider (starting from the 5' end) |
void vrna_seq_toRNA | ( | char * | sequence | ) |
#include <ViennaRNA/utils/strings.h>
Convert an input sequence (possibly containing DNA alphabet characters) to RNA alphabet.
This function substitudes T and t with U and u, respectively
sequence | The sequence to be converted |
void vrna_seq_toupper | ( | char * | sequence | ) |
#include <ViennaRNA/utils/strings.h>
Convert an input sequence to uppercase.
sequence | The sequence to be converted |
void vrna_seq_reverse | ( | char * | sequence | ) |
#include <ViennaRNA/utils/strings.h>
Reverse a string in-place.
This function reverses a character string in the form of an array of characters in-place, i.e. it changes the input parameter.
sequence
consists of the reverse string prior to the execution.sequence | The string to reverse |
char * vrna_DNA_complement | ( | const char * | sequence | ) |
#include <ViennaRNA/utils/strings.h>
Retrieve a DNA sequence which resembles the complement of the input sequence.
This function returns a mew DNA string which is the complement of the input, i.e. the nucleotide letters A
,C
,G
, and T
are substituted by their complements T
,G
,C
, and A
, respectively.
Any characters not belonging to the alphabet of the 4 canonical bases of DNA are not altered.
U
of the RNA alphabet equally to T
sequence | the input DNA sequence |
char * vrna_seq_ungapped | ( | const char * | sequence | ) |
#include <ViennaRNA/utils/strings.h>
Remove gap characters from a nucleotide sequence.
sequence | The original, null-terminated nucleotide sequence |
char * vrna_cut_point_insert | ( | const char * | string, |
int | cp | ||
) |
#include <ViennaRNA/utils/strings.h>
Add a separating '&' character into a string according to cut-point position.
If the cut-point position is less or equal to zero, this function just returns a copy of the provided string. Otherwise, the cut-point character is set at the corresponding position
string | The original string |
cp | The cut-point position |
char * vrna_cut_point_remove | ( | const char * | string, |
int * | cp | ||
) |
#include <ViennaRNA/utils/strings.h>
Remove a separating '&' character from a string.
This function removes the cut-point indicating '&' character from a string and memorizes its position in a provided integer variable. If not '&' is found in the input, the integer variable is set to -1. The function returns a copy of the input string with the '&' being sliced out.
string | The original string |
cp | The cut-point position |