Simple utilities sink - stuff that doesn't fit anywhere else / objects in plain C Snapshot
Classes | Defines | Typedefs | Functions
STRTK

defines a character class. holds a bitmap of characters, that is later used for tokenization. More...

Classes

struct  tagSTRTK

Defines

#define STRTK_IS_CHAR(tok, ch)   ( (tok)->pattern[ (ch) >> 3 ] && (1 << ((ch) & 7)) )
 macro checks if character is part of character class

Typedefs

typedef struct tagSTRTK STRTK

Functions

void STRTK_init (STRTK *tok, const char *stop_chars)
 initalise a character class
M_INLINE void STRTK_add_char (STRTK *tok, uint8_t ch)
M_INLINE char * STRTK_spn (STRTK *tok, const char *hay)
 Returns token where all characters belong to argument character class.
M_INLINE char * STRTK_nspn (STRTK *tok, const char *hay, size_t nsize)
 Returns token where all characters belong to argument character class, argument string is limited.
M_INLINE char * STRTK_ntok (STRTK *tok, const char *hay, size_t nsize, char **end_tok)
 tokenize a string . the character class is interpreted as set of white spaces.
M_INLINE char * STRTK_tok (STRTK *tok, const char *hay, char **end_tok)
 tokenize a string . the character class is interpreted as set of white spaces.

Detailed Description

defines a character class. holds a bitmap of characters, that is later used for tokenization.

Functions like strtok and strpbrk build a bitmap of characters when they are called. They use this bitmap when these function later parse the argument character string, this bitmap is used to check if a character is part of the character set or not.

The class STRTK holds the bitmap holding the character set, so that creation of this bitmap can be done in separate step from usage, therefore parsing function will not create character bitmap over and over again.

I don't know if this is very usefull, a character class bitmap takes 32 bytes, which is two cache lines. So two cache lines have to be moved in order to use this stuff.


Define Documentation

#define STRTK_IS_CHAR (   tok,
  ch 
)    ( (tok)->pattern[ (ch) >> 3 ] && (1 << ((ch) & 7)) )

macro checks if character is part of character class

Definition at line 48 of file strtk.h.


Typedef Documentation

typedef struct tagSTRTK STRTK

Function Documentation

M_INLINE void STRTK_add_char ( STRTK tok,
uint8_t  ch 
)

Definition at line 40 of file strtk.h.

{
  tok->pattern[ ch >> 3 ] |= (1 << (ch & 7));
}
void STRTK_init ( STRTK tok,
const char *  stop_chars 
)

initalise a character class

Definition at line 4 of file strtk.c.

{
  uint8_t *pos;
  memset( tok->pattern, 0, STRTK_PATTERN_SIZE );

  for( pos = (uint8_t *) stop_chars; *pos != '\0'; ++pos ) {
    STRTK_add_char( tok, *pos );  
  }
}
M_INLINE char* STRTK_nspn ( STRTK tok,
const char *  hay,
size_t  nsize 
)

Returns token where all characters belong to argument character class, argument string is limited.

Returns:
the pointer to the next character that is not part of the character class.

Definition at line 71 of file strtk.h.

{
   uint8_t *cpos;

   for( cpos = (uint8_t *) hay; *cpos != '\0' && nsize > 0 ; ++cpos, --nsize ) {
      if (!STRTK_IS_CHAR( tok, *cpos )) {
         break;
      }
   }
   return (char *) cpos;
}
M_INLINE char* STRTK_ntok ( STRTK tok,
const char *  hay,
size_t  nsize,
char **  end_tok 
)

tokenize a string . the character class is interpreted as set of white spaces.

Returns:
pointer to the start of token, end_tok returns position right after the token.

Definition at line 88 of file strtk.h.

{
   uint8_t *cpos;
   char *start_tok;

   // skip leading whitespaces.
   for( cpos = (uint8_t *) hay; *cpos != '\0' && nsize > 0 && STRTK_IS_CHAR( tok, *cpos ) ; ++cpos, --nsize );
 
   start_tok = (char *) cpos;

   for( ; *cpos != '\0' && nsize > 0 && !STRTK_IS_CHAR( tok, *cpos ) ; ++cpos, --nsize );

   *end_tok = (char *) cpos;

   if (start_tok == (char *) cpos) {
     return 0;
   }
   return start_tok;
}
M_INLINE char* STRTK_spn ( STRTK tok,
const char *  hay 
)

Returns token where all characters belong to argument character class.

Returns:
the pointer to the next character that is not part of the character class.

Definition at line 55 of file strtk.h.

{
   uint8_t *cpos;

   for( cpos = (uint8_t *) hay; *cpos != '\0'; ++cpos ) {
      if (!STRTK_IS_CHAR( tok, *cpos )) {
         break;
      }
   }
   return (char *) cpos;
}
M_INLINE char* STRTK_tok ( STRTK tok,
const char *  hay,
char **  end_tok 
)

tokenize a string . the character class is interpreted as set of white spaces.

Returns:
pointer to the start of token, end_tok returns position right after the token.

Definition at line 112 of file strtk.h.

{
   uint8_t *cpos;
   char *start_tok;

   // skip leading whitespaces.
   for( cpos = (uint8_t *) hay; *cpos != '\0' && STRTK_IS_CHAR( tok, *cpos ) ; ++cpos);
 
   start_tok = (char *) cpos;

   for( ; *cpos != '\0' && !STRTK_IS_CHAR( tok, *cpos )  ; ++cpos);

   *end_tok = (char *) cpos;

   if (start_tok == (char *) cpos) {
     return 0;
   }
   return start_tok;
}