Logo Search packages:      
Sourcecode: fceux version File versions  Download package

xstring.cpp

Go to the documentation of this file.
/* Extended string routines
 *
 * Copyright notice for this file:
 *  Copyright (C) 2004 Jason Oster (Parasyte)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/// \file
/// \brief various string manipulation utilities

#include "xstring.h"
#include <string>

///Upper case routine. Returns number of characters modified
00028 int str_ucase(char *str) {
      unsigned int i=0,j=0; //mbg merge 7/17/06 changed to unsigned int

      while (i < strlen(str)) {
            if ((str[i] >= 'a') && (str[i] <= 'z')) {
                  str[i] &= ~0x20;
                  j++;
            }
            i++;
      }
      return j;
}


///Lower case routine. Returns number of characters modified
00043 int str_lcase(char *str) {
      unsigned int i=0,j=0; //mbg merge 7/17/06 changed to unsigned int

      while (i < strlen(str)) {
            if ((str[i] >= 'A') && (str[i] <= 'Z')) {
                  str[i] |= 0x20;
                  j++;
            }
            i++;
      }
      return j;
}


///White space-trimming routine

///Removes whitespace from left side of string, depending on the flags set (See STRIP_x definitions in xstring.h)
///Returns number of characters removed
00061 int str_ltrim(char *str, int flags) {
      unsigned int i=0; //mbg merge 7/17/06 changed to unsigned int

      while (str[0]) {
            if ((str[0] != ' ') || (str[0] != '\t') || (str[0] != '\r') || (str[0] != '\n')) break;

            if ((flags & STRIP_SP) && (str[0] == ' ')) {
                  i++;
                  strcpy(str,str+1);
            }
            if ((flags & STRIP_TAB) && (str[0] == '\t')) {
                  i++;
                  strcpy(str,str+1);
            }
            if ((flags & STRIP_CR) && (str[0] == '\r')) {
                  i++;
                  strcpy(str,str+1);
            }
            if ((flags & STRIP_LF) && (str[0] == '\n')) {
                  i++;
                  strcpy(str,str+1);
            }
      }
      return i;
}


///White space-trimming routine

///Removes whitespace from right side of string, depending on the flags set (See STRIP_x definitions in xstring.h)
///Returns number of characters removed
00092 int str_rtrim(char *str, int flags) {
      unsigned int i=0; //mbg merge 7/17/06 changed to unsigned int

      while (strlen(str)) {
            if ((str[strlen(str)-1] != ' ') ||
                  (str[strlen(str)-1] != '\t') ||
                  (str[strlen(str)-1] != '\r') ||
                  (str[strlen(str)-1] != '\n')) break;

            if ((flags & STRIP_SP) && (str[0] == ' ')) {
                  i++;
                  str[strlen(str)-1] = 0;
            }
            if ((flags & STRIP_TAB) && (str[0] == '\t')) {
                  i++;
                  str[strlen(str)-1] = 0;
            }
            if ((flags & STRIP_CR) && (str[0] == '\r')) {
                  i++;
                  str[strlen(str)-1] = 0;
            }
            if ((flags & STRIP_LF) && (str[0] == '\n')) {
                  i++;
                  str[strlen(str)-1] = 0;
            }
      }
      return i;
}


///White space-stripping routine

///Removes whitespace depending on the flags set (See STRIP_x definitions in xstring.h)
///Returns number of characters removed, or -1 on error
00126 int str_strip(char *str, int flags) {
      unsigned int i=0,j=0; //mbg merge 7/17/06 changed to unsigned int
      char *astr,chr;

      if (!strlen(str)) return -1;
      if (!(flags & (STRIP_SP|STRIP_TAB|STRIP_CR|STRIP_LF))) return -1;
      if (!(astr = (char*)malloc(strlen(str)+1))) return -1;
      while (i < strlen(str)) {
            chr = str[i++];
            if ((flags & STRIP_SP) && (chr == ' ')) chr = 0;
            if ((flags & STRIP_TAB) && (chr == '\t')) chr = 0;
            if ((flags & STRIP_CR) && (chr == '\r')) chr = 0;
            if ((flags & STRIP_LF) && (chr == '\n')) chr = 0;

            if (chr) astr[j++] = chr;
      }
      astr[j] = 0;
      strcpy(str,astr);
      free(astr);
      return j;
}


///Character replacement routine

///Replaces all instances of 'search' with 'replace'
///Returns number of characters modified
00153 int chr_replace(char *str, char search, char replace) {
      unsigned int i=0,j=0; //mbg merge 7/17/06 changed to unsigned int

      while (i < strlen(str)) {
            if (str[i] == search) {
                  str[i] = replace;
                  j++;
            }
            i++;
      }
      return j;
}


///Sub-String replacement routine

///Replaces all instances of 'search' with 'replace'
///Returns number of sub-strings modified, or -1 on error
00171 int str_replace(char *str, char *search, char *replace) {
      unsigned int i=0,j=0; //mbg merge 7/17/06 changed to unsigned int
      int searchlen,replacelen;
      char *astr;

      searchlen = strlen(search);
      replacelen = strlen(replace);
      if ((!strlen(str)) || (!searchlen)) return -1; //note: allow *replace to have a length of zero!
      if (!(astr = (char*)malloc(strlen(str)+1))) return -1;
      while (i < strlen(str)) {
            if (!strncmp(str+i,search,searchlen)) {
                  if (replacelen) memcpy(astr+j,replace,replacelen);
                  i += searchlen;
                  j += replacelen;
            }
            else astr[j++] = str[i++];
      }
      astr[j] = 0;
      strcpy(str,astr);
      free(astr);
      return j;
}

static const struct Base64Table
{
      Base64Table()
      {
            size_t a=0;
            for(a=0; a<256; ++a) data[a] = 0xFF; // mark everything as invalid by default
            // create value->ascii mapping
            a=0;
            for(unsigned char c='A'; c<='Z'; ++c) data[a++] = c; // 0..25
            for(unsigned char c='a'; c<='z'; ++c) data[a++] = c; // 26..51
            for(unsigned char c='0'; c<='9'; ++c) data[a++] = c; // 52..61
            data[62] = '+';                             // 62
            data[63] = '/';                             // 63
            // create ascii->value mapping (but due to overlap, write it to highbit region)
            for(a=0; a<64; ++a) data[data[a]^0x80] = a; // 
            data[((unsigned char)'=') ^ 0x80] = 0;
      }
      unsigned char operator[] (size_t pos) const { return data[pos]; }
private:
      unsigned char data[256];
} Base64Table;

///Converts the provided data to a string in a standard, user-friendly, round-trippable format
00217 std::string BytesToString(const void* data, int len)
{
      char temp[16];
      if(len==1) {
            sprintf(temp,"%d",*(const unsigned char*)data);
            return temp;
      } else if(len==2) {
            sprintf(temp,"%d",*(const unsigned short*)data);
            return temp;
      } else if(len==4) {
            sprintf(temp,"%d",*(const unsigned int*)data);
            return temp;            
      }
      
      std::string ret;
      if(1) // use base64
      {
            const unsigned char* src = (const unsigned char*)data;
            ret = "base64:";
            for(int n; len > 0; len -= n)
            {
                  unsigned char input[3] = {0,0,0};
                  for(n=0; n<3 && n<len; ++n)
                        input[n] = *src++;
                  unsigned char output[4] =
                  {
                        Base64Table[ input[0] >> 2 ],
                        Base64Table[ ((input[0] & 0x03) << 4) | (input[1] >> 4) ],
                        n<2 ? '=' : Base64Table[ ((input[1] & 0x0F) << 2) | (input[2] >> 6) ],
                        n<3 ? '=' : Base64Table[ input[2] & 0x3F ]
                  };
                  ret.append(output, output+4);
            }
      }
      else // use hex
      {
            ret.resize(len*2+2);
            ret[0] = '0';
            ret[1] = 'x';
            for(int i=0;i<len;i++)
            {
                  int a = (((const unsigned char*)data)[i]>>4);
                  int b = (((const unsigned char*)data)[i])&15;
                  if(a>9) a += 'A'-10;
                  else a += '0';
                  if(b>9) b += 'A'-10;
                  else b += '0';
                  ret[2+i*2] = a;
                  ret[2+i*2+1] = b;
            }
      }
      return ret;
}

///returns -1 if this is not a hex string
00272 int HexStringToBytesLength(const std::string& str)
{
      if(str.size()>2 && str[0] == '0' && toupper(str[1]) == 'X')
            return str.size()/2-1;
      else return -1;
}

int Base64StringToBytesLength(const std::string& str)
{
      if(str.size() < 7 || (str.size()-7) % 4 || str.substr(0,7) != "base64:") return -1;
      
      size_t c = ((str.size() - 7) / 4) * 3;
      if(str[str.size()-1] == '=') { --c;
      if(str[str.size()-2] == '=') --c; }
      return c;
}

///parses a string in the same format as BytesToString
///returns true if success.
00291 bool StringToBytes(const std::string& str, void* data, int len)
{
      if(str.substr(0,7) == "base64:")
      {
            // base64
            unsigned char* tgt = (unsigned char*)data;
            for(size_t pos = 7; pos < str.size() && len > 0; )
            {
                  unsigned char input[4], converted[4];
                  for(int i=0; i<4; ++i)
                  {
                        if(pos >= str.size() && i > 0) return false; // invalid data
                        input[i]     = str[pos++];
                        if(input[i] & 0x80) return false;     // illegal character
                        converted[i] = Base64Table[input[i]^0x80];
                        if(converted[i] & 0x80) return false; // illegal character
                  }
                  unsigned char outpacket[3] =
                  {
                        (converted[0] << 2) | (converted[1] >> 4),
                        (converted[1] << 4) | (converted[2] >> 2),
                        (converted[2] << 6) | (converted[3])
                  };
                  int outlen = (input[2] == '=') ? 1 : (input[3] == '=' ? 2 : 3);
                  if(outlen > len) outlen = len;
                  memcpy(tgt, outpacket, outlen);
                  tgt += outlen;
                  len -= outlen;
            }
            return true;
      }
      if(str.size()>2 && str[0] == '0' && toupper(str[1]) == 'X')
      {
            // hex
            int amt = len;
            int bytesAvailable = str.size()/2;
            if(bytesAvailable < amt)
                  amt = bytesAvailable;
            const char* cstr = str.c_str()+2;
            for(int i=0;i<amt;i++) {
                  char a = toupper(cstr[i*2]);
                  char b = toupper(cstr[i*2+1]);
                  if(a>='A') a=a-'A'+10;
                  else a-='0';
                  if(b>='A') b=b-'A'+10;
                  else b-='0';
                  unsigned char val = ((unsigned char)a<<4)|(unsigned char)b; 
                  ((unsigned char*)data)[i] = val;
            }
            return true;
      }
      
      if(len==1) {
            int x = atoi(str.c_str());
            *(unsigned char*)data = x;
            return true;
      } else if(len==2) {
            int x = atoi(str.c_str());
            *(unsigned short*)data = x;
            return true;
      } else if(len==4) {
            int x = atoi(str.c_str());
            *(unsigned int*)data = x;
            return true;
      }
      //we can't handle it
      return false;
}

#include <string>
#include <vector>
/// \brief convert input string into vector of string tokens
///
/// \note consecutive delimiters will be treated as single delimiter
/// \note delimiters are _not_ included in return data
///
/// \param input string to be parsed
/// \param delims list of delimiters.

00370 std::vector<std::string> tokenize_str(const std::string & str,
                                      const std::string & delims=", \t")
{
  using namespace std;
  // Skip delims at beginning, find start of first token
  string::size_type lastPos = str.find_first_not_of(delims, 0);
  // Find next delimiter @ end of token
  string::size_type pos     = str.find_first_of(delims, lastPos);

  // output vector
  vector<string> tokens;

  while (string::npos != pos || string::npos != lastPos)
    {
      // Found a token, add it to the vector.
      tokens.push_back(str.substr(lastPos, pos - lastPos));
      // Skip delims.  Note the "not_of". this is beginning of token
      lastPos = str.find_first_not_of(delims, pos);
      // Find next delimiter at end of token.
      pos     = str.find_first_of(delims, lastPos);
    }

  return tokens;
}

//this code was taken from WINE (LGPL)
//http://google.com/codesearch?hl=en&q=+lang:c+splitpath+show:CPvw9Z-euls:_RSotQzmLeU:KGzljMEYFbY&sa=N&cd=9&ct=rc&cs_p=http://gentoo.osuosl.org/distfiles/Wine-20050524.tar.gz&cs_f=wine-20050524/programs/winefile/splitpath.c
void splitpath(const char* path, char* drv, char* dir, char* name, char* ext)
{
    const char* end; /* end of processed string */
      const char* p;     /* search pointer */
      const char* s;     /* copy pointer */

      /* extract drive name */
      if (path[0] && path[1]==':') {
            if (drv) {
                  *drv++ = *path++;
                  *drv++ = *path++;
                  *drv = '\0';
            } else path+=2;
      } else if (drv)
            *drv = '\0';

      /* search for end of string or stream separator */
      for(end=path; *end && *end!=':'; )
            end++;

      /* search for begin of file extension */
      for(p=end; p>path && *--p!='\\' && *p!='/'; )
            if (*p == '.') {
                  end = p;
                  break;
            }

      if (ext)
            for(s=end; (*ext=*s++); )
                  ext++;
      else
            for(s=end; *s++; ) {}

      /* search for end of directory name */
      for(p=end; p>path; )
            if (*--p=='\\' || *p=='/') {
                  p++;
                  break;
            }

      if (name) {
            for(s=p; s<end; )
                  *name++ = *s++;

            *name = '\0';
      } else
            for(s=p; s<end; )
                  *s++;

      if (dir) {
            for(s=path; s<p; )
                  *dir++ = *s++;

            *dir = '\0';
      }
}

//mbg 5/12/08
//for the curious, I tested U16ToHexStr and it was 10x faster than printf.
//so the author of these dedicated functions is not insane, and I will leave them.

static char TempArray[11];

uint16 FastStrToU16(char* s, bool& valid)
{
      int i;
      uint16 v=0;
      for(i=0; i < 4; i++)
      {
            if(s[i] == 0) return v;
            v<<=4;
            if(s[i] >= '0' && s[i] <= '9')
            {
                  v+=s[i]-'0';
            }
            else if(s[i] >= 'a' && s[i] <= 'f')
            {
                  v+=s[i]-'a'+10;
            }
            else if(s[i] >= 'A' && s[i] <= 'F')
            {
                  v+=s[i]-'A'+10;
            }
            else
            {
                  valid = false;
                  return 0xFFFF;
            }
      }
      valid = true;
      return v;
}

char *U8ToDecStr(uint8 a)
{
      TempArray[0] = '0' + a/100;
      TempArray[1] = '0' + (a%100)/10;
      TempArray[2] = '0' + (a%10);
      TempArray[3] = 0;
      return TempArray;
}

char *U16ToDecStr(uint16 a)
{
      TempArray[0] = '0' + a/10000;
      TempArray[1] = '0' + (a%10000)/1000;
      TempArray[2] = '0' + (a%1000)/100;
      TempArray[3] = '0' + (a%100)/10;
      TempArray[4] = '0' + (a%10);
      TempArray[5] = 0;
      return TempArray;
}

char *U32ToDecStr(char* buf, uint32 a)
{
      buf[0] = '0' + a/1000000000;
      buf[1] = '0' + (a%1000000000)/100000000;
      buf[2] = '0' + (a%100000000)/10000000;
      buf[3] = '0' + (a%10000000)/1000000;
      buf[4] = '0' + (a%1000000)/100000;
      buf[5] = '0' + (a%100000)/10000;
      buf[6] = '0' + (a%10000)/1000;
      buf[7] = '0' + (a%1000)/100;
      buf[8] = '0' + (a%100)/10;
      buf[9] = '0' + (a%10);
      buf[10] = 0;
      return buf;
}
char *U32ToDecStr(uint32 a)
{
      return U32ToDecStr(TempArray,a);
}

char *U16ToHexStr(uint16 a)
{
      TempArray[0] = a/4096 > 9?'A'+a/4096-10:'0' + a/4096;
      TempArray[1] = (a%4096)/256 > 9?'A'+(a%4096)/256 - 10:'0' + (a%4096)/256;
      TempArray[2] = (a%256)/16 > 9?'A'+(a%256)/16 - 10:'0' + (a%256)/16;
      TempArray[3] = a%16 > 9?'A'+(a%16) - 10:'0' + (a%16);
      TempArray[4] = 0;
      return TempArray;
}

char *U8ToHexStr(uint8 a)
{
      TempArray[0] = a/16 > 9?'A'+a/16 - 10:'0' + a/16;
      TempArray[1] = a%16 > 9?'A'+(a%16) - 10:'0' + (a%16);
      TempArray[2] = 0;
      return TempArray;
}

std::string stditoa(int n)
{
      char tempbuf[16];
      sprintf(tempbuf, "%d", n);
      return tempbuf;
}


std::string readNullTerminatedAscii(std::istream* is)
{
      std::string ret;
      ret.reserve(50);
      for(;;) 
      {
            int c = is->get();
            if(c == 0) break;
            else ret += (char)c;
      }
      return ret;
}

// replace all instances of victim with replacement
std::string mass_replace(const std::string &source, const std::string &victim, const std::string &replacement)
{
      std::string answer = source;
      std::string::size_type j = 0;
      while ((j = answer.find(victim, j)) != std::string::npos )
      answer.replace(j, victim.length(), replacement);
      return answer;
}

#ifdef WIN32 // this code tends to crash on SDL.
//http://www.codeproject.com/KB/string/UtfConverter.aspx
#include "ConvertUTF.h"
namespace UtfConverter
{
    std::wstring FromUtf8(const std::string& utf8string)
    {
        size_t widesize = utf8string.length();
        if (sizeof(wchar_t) == 2)
        {
            wchar_t* widestringnative = new wchar_t[widesize+1];
            const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
            const UTF8* sourceend = sourcestart + widesize;
            UTF16* targetstart = reinterpret_cast<UTF16*>(widestringnative);
            UTF16* targetend = targetstart + widesize+1;
            ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                delete [] widestringnative;
                throw std::exception();
            }
            *targetstart = 0;
            std::wstring resultstring(widestringnative);
            delete [] widestringnative;
            return resultstring;
        }
        else if (sizeof(wchar_t) == 4) // somewhat pointless as it's always 2 on WIN32, but whatever.
        {
            wchar_t* widestringnative = new wchar_t[widesize];
            const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
            const UTF8* sourceend = sourcestart + widesize;
            UTF32* targetstart = reinterpret_cast<UTF32*>(widestringnative);
            UTF32* targetend = targetstart + widesize;
            ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                delete [] widestringnative;
                throw std::exception();
            }
            *targetstart = 0;
            std::wstring resultstring(widestringnative);
            delete [] widestringnative;
            return resultstring;
        }
        else
        {
            throw std::exception();
        }
        return L"";
    }

    std::string ToUtf8(const std::wstring& widestring)
    {
        size_t widesize = widestring.length();

        if (sizeof(wchar_t) == 2)
        {
            size_t utf8size = 3 * widesize + 1;
            char* utf8stringnative = new char[utf8size];
            const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str());
            const UTF16* sourceend = sourcestart + widesize;
            UTF8* targetstart = reinterpret_cast<UTF8*>(utf8stringnative);
            UTF8* targetend = targetstart + utf8size;
            ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                delete [] utf8stringnative;
                throw std::exception();
            }
            *targetstart = 0;
            std::string resultstring(utf8stringnative);
            delete [] utf8stringnative;
            return resultstring;
        }
        else if (sizeof(wchar_t) == 4) // again, sizeof(wchar_t) == 2 in win32
        {
            size_t utf8size = 4 * widesize + 1;
            char* utf8stringnative = new char[utf8size];
            const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str());
            const UTF32* sourceend = sourcestart + widesize;
            UTF8* targetstart = reinterpret_cast<UTF8*>(utf8stringnative);
            UTF8* targetend = targetstart + utf8size;
            ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
            if (res != conversionOK)
            {
                delete [] utf8stringnative;
                throw std::exception();
            }
            *targetstart = 0;
            std::string resultstring(utf8stringnative);
            delete [] utf8stringnative;
            return resultstring;
        }
        else
        {
            throw std::exception();
        }
        return "";
    }
}
#else
namespace UtfConverter
{
    void SeqValue(std::string& result, unsigned n)
    {
        if(n < 0x80)                 // <=7 bits
            result += (char)n;
        else
        {
            if(n < 0x800)            // <=11 bits
                result += (char)(0xC0 + (n>>6));
            else
            {
                if(n < 0x10000)      // <=16 bits
                    result += (char)(0xE0 + (n>>12));
                else                 // <=21 bits
                {
                    result += (char)(0xF0 + (n>>18));
                    result += (char)(0x80 + ((n>>12)&63));
                }
                result += (char)(0x80 + ((n>>6)&63));
            }
            result += (char)(0x80 + (n&63));
        }
    }

    unsigned DecData(const std::string& input, size_t& pos)
    {
        unsigned char headbyte = input[pos];
        static const char sizes[16] =  { 1,1,1,1,1,1,1,1, 0,0,0,0,2,2,3,4 };
        static const unsigned minimums[4] = { 0, 0x80, 0x800, 0x10000 };
        static const char     masks[4]    = { 0x7F, 0x1F, 0x0F, 0x07 };
        unsigned len = sizes[headbyte >> 4];
        if(len < 1 || pos+len > input.size()) { ++pos; return '?'; }
        unsigned result=0, shl=0;
        for(unsigned n = len; --n > 0; shl += 6)
        {
            unsigned char byte = input[pos+n];
            if((byte & 0xC0) != 0x80) { ++pos; return '?'; }
            result |= (byte & 0x3F) << shl;
        }
        result |= (headbyte & masks[len-1]) << shl;
        if(result < minimums[len-1]) { ++pos; return '?'; }
        pos += len;
        return result;
    }


    std::wstring FromUtf8(std::string& input) // string -> wstring
    {
        std::wstring result;
        for(std::string::size_type pos = 0; pos < input.size(); )
            result += DecData(input, pos);
        return result;
    }
    
    std::string ToUtf8(std::wstring& input) // wstring -> string
    {
        std::string result;
        for(std::string::size_type pos = 0; pos < input.size(); ++pos)
            SeqValue(result, input[pos]);
        return result;
    }
}
#endif

  
//convert a std::string to std::wstring
std::wstring mbstowcs(std::string str) // UTF8->UTF32
{
      try {
            return UtfConverter::FromUtf8(str);
      } catch(std::exception) {
            return L"(failed UTF-8 conversion)";
      }
}

//convert a std::wstring to std::string
std::string wcstombs(std::wstring str) // UTF32->UTF8
{
      return UtfConverter::ToUtf8(str);
}


//TODO - dont we already have another  function that can do this
std::string getExtension(const char* input) {
      char buf[1024];
      strcpy(buf,input);
      char* dot=strrchr(buf,'.');
      if(!dot)
            return "";
      char ext [512];
      strcpy(ext, dot+1);
      int k, extlen=strlen(ext);
      for(k=0;k<extlen;k++)
            ext[k]=tolower(ext[k]);
      return ext;
}

Generated by  Doxygen 1.6.0   Back to index