Simple utilities sink - stuff that doesn't fit anywhere else / objects in plain C Snapshot
strtk.h
Go to the documentation of this file.
00001 #ifndef __STRTK_X_Y_H__
00002 #define __STRTK_X_Y_H__
00003 
00004 #include <cutils/base.h>
00005 
00006 #define STRTK_PATTERN_SIZE ( 256 / 8 )
00007 
00008 
00009 /**
00010  * @defgroup STRTK
00011  *
00012  * @brief defines a character class. holds a bitmap of characters, that is later used for tokenization.
00013  *
00014  * Functions like strtok and strpbrk build a bitmap of characters when they are called.
00015  * They use this bitmap when these function later parse the argument character
00016  * string, this bitmap is used to check if a character is
00017  * part of the character set or not.
00018  *
00019  * The class STRTK holds the bitmap holding the character set, so that creation
00020  * of this bitmap can be done in separate step from usage, therefore parsing
00021  * function will not create character bitmap over and over again.
00022  *
00023  * I don't know if this is very usefull, a character class bitmap takes 32 bytes, which is two cache lines.
00024  * So two cache lines have to be moved in order to use this stuff.
00025  *
00026  * @{
00027  */
00028 
00029 typedef struct tagSTRTK {
00030   uint8_t pattern[ STRTK_PATTERN_SIZE ];
00031 
00032 } STRTK;
00033 
00034 
00035 /**
00036  * @brief initalise a character class
00037  */
00038 void STRTK_init( STRTK *tok , const char *stop_chars );
00039 
00040 M_INLINE void STRTK_add_char( STRTK *tok, uint8_t ch )
00041 {
00042   tok->pattern[ ch >> 3 ] |= (1 << (ch & 7));
00043 }
00044 
00045 /**
00046  * @brief macro checks if character is part of character class
00047  */
00048 #define STRTK_IS_CHAR( tok, ch ) \
00049    ( (tok)->pattern[ (ch) >> 3 ] && (1 << ((ch) & 7))  )
00050 
00051 /**
00052  * @brief Returns token where all characters belong to argument character class.
00053  * @returns the pointer to the next character that is not part of the character class.
00054  */
00055 M_INLINE char * STRTK_spn(STRTK *tok, const char *hay)
00056 {
00057    uint8_t *cpos;
00058 
00059    for( cpos = (uint8_t *) hay; *cpos != '\0'; ++cpos ) {
00060       if (!STRTK_IS_CHAR( tok, *cpos )) {
00061          break;
00062       }
00063    }
00064    return (char *) cpos;
00065 }
00066 
00067 /**
00068  * @brief Returns token where all characters belong to argument character class, argument string is limited.
00069  * @returns the pointer to the next character that is not part of the character class.
00070  */
00071 M_INLINE char * STRTK_nspn(STRTK *tok, const char *hay, size_t nsize )
00072 {
00073    uint8_t *cpos;
00074 
00075    for( cpos = (uint8_t *) hay; *cpos != '\0' && nsize > 0 ; ++cpos, --nsize ) {
00076       if (!STRTK_IS_CHAR( tok, *cpos )) {
00077          break;
00078       }
00079    }
00080    return (char *) cpos;
00081 }
00082 
00083 
00084 /** 
00085  * @brief tokenize a string . the character class is interpreted as set of white spaces.
00086  * @return pointer to the start of token, end_tok returns position right after the token.
00087  */
00088 M_INLINE char * STRTK_ntok(STRTK *tok, const char *hay, size_t nsize, char **end_tok )
00089 {
00090    uint8_t *cpos;
00091    char *start_tok;
00092 
00093    // skip leading whitespaces.
00094    for( cpos = (uint8_t *) hay; *cpos != '\0' && nsize > 0 && STRTK_IS_CHAR( tok, *cpos ) ; ++cpos, --nsize );
00095  
00096    start_tok = (char *) cpos;
00097 
00098    for( ; *cpos != '\0' && nsize > 0 && !STRTK_IS_CHAR( tok, *cpos ) ; ++cpos, --nsize );
00099 
00100    *end_tok = (char *) cpos;
00101 
00102    if (start_tok == (char *) cpos) {
00103      return 0;
00104    }
00105    return start_tok;
00106 }
00107 
00108 /** 
00109  * @brief tokenize a string . the character class is interpreted as set of white spaces.
00110  * @return pointer to the start of token, end_tok returns position right after the token.
00111  */
00112 M_INLINE char * STRTK_tok(STRTK *tok, const char *hay, char **end_tok )
00113 {
00114    uint8_t *cpos;
00115    char *start_tok;
00116 
00117    // skip leading whitespaces.
00118    for( cpos = (uint8_t *) hay; *cpos != '\0' && STRTK_IS_CHAR( tok, *cpos ) ; ++cpos);
00119  
00120    start_tok = (char *) cpos;
00121 
00122    for( ; *cpos != '\0' && !STRTK_IS_CHAR( tok, *cpos )  ; ++cpos);
00123 
00124    *end_tok = (char *) cpos;
00125 
00126    if (start_tok == (char *) cpos) {
00127      return 0;
00128    }
00129    return start_tok;
00130 }
00131 
00132 /**
00133  * @}
00134  */
00135 
00136 
00137 #endif
00138