Samchon Framework for CPP  1.0.0
WeakString.hpp
1 #pragma once
2 
3 #include <string>
4 #include <vector>
5 #include <list>
6 #include <queue>
7 #include <algorithm>
8 #include <samchon/IndexPair.hpp>
9 
10 #include <samchon/library/IOperator.hpp>
11 #include <samchon/library/Math.hpp>
12 
13 namespace samchon
14 {
35  class WeakString
36  {
37  private:
41  static const std::vector<std::string> SPACE_ARRAY;
42 
43  public:
56  static const size_t npos = -1;
57 
58  private:
62  const char *data_;
63 
67  size_t size_;
68 
69  public:
70  /* --------------------------------------------------------------------
71  CONSTRUCTORS
72  -------------------------------------------------------------------- */
78  {
79  this->data_ = nullptr;
80  this->size_ = 0;
81  };
82 
102  WeakString(const char *data, size_t size)
103  {
104  this->data_ = data;
105  this->size_ = size;
106  };
107 
127  WeakString(const char *begin, const char *end)
128  : WeakString(begin, end - begin)
129  {
130  };
131 
139  WeakString(const char *data)
140  {
141  this->data_ = data;
142 
143  if (data == nullptr)
144  this->size_ = 0;
145  else
146  this->size_ = std::char_traits<char>::length(data);
147  };
148 
156  WeakString(const char &ch)
157  {
158  this->data_ = &ch;
159  this->size_ = 1;
160  };
161 
169  WeakString(std::initializer_list<char> &il)
170  {
171  if (il.size() == 0)
172  this->data_ = nullptr;
173  else
174  this->data_ = il.begin();
175 
176  this->size_ = il.size();
177  };
178 
186  WeakString(const std::string &str)
187  {
188  this->data_ = str.data();
189  this->size_ = str.size();
190  };
191 
192  /* --------------------------------------------------------------------
193  ELEMENT ACCESSORS
194  -------------------------------------------------------------------- */
206  auto data() const -> const char*
207  {
208  return data_;
209  };
210 
217  auto size() const -> size_t
218  {
219  return size_;
220  };
221 
231  auto empty() const -> bool
232  {
233  return data_ == nullptr || size_ == 0;
234  };
235 
242  auto at(size_t index) const -> const char&
243  {
244  if (index > size_)
245  throw std::out_of_range("out of range.");
246 
247  return *(data_ + index);
248  };
249 
253  auto operator[](size_t index) const -> const char&
254  {
255  return *(data_ + index);
256  };
257 
258  /* --------------------------------------------------------------------
259  FINDERS
260  -------------------------------------------------------------------- */
273  auto find(const WeakString &delim, size_t startIndex = NULL) const -> size_t
274  {
275  size_t j = 0;
276 
277  for (size_t i = startIndex; i < size_; i++)
278  if (data_[i] != delim[j++])
279  j = 0;
280  else if (j == delim.size())
281  return i - delim.size() + 1;
282 
283  return npos;
284  };
285 
297  auto rfind(const WeakString &delim, size_t endIndex = SIZE_MAX) const -> size_t
298  {
299  if (empty() == true || endIndex == 0)
300  return npos;
301 
302  size_t j = delim.size() - 1;
303 
304  for (long long i = std::min<size_t>(endIndex - 1, size_ - 1); i >= 0; i--)
305  if (data_[(size_t)i] != delim[j]) //NOT MATCHED
306  j = delim.size() - 1;
307  else if (j == 0) //FULLY MATCHED
308  return (size_t)i;
309  else //PARTIALLY MATCHED,
310  j--;
311 
312  return npos;
313  };
314 
330  auto finds(const std::vector<std::string> &delims, size_t startIndex = 0) const -> IndexPair<WeakString>
331  {
332  std::vector<WeakString> wdelims(delims.size());
333  for (size_t i = 0; i < delims.size(); i++)
334  wdelims[i] = delims[i];
335 
336  return finds(wdelims, startIndex);
337  };
338 
339  auto finds(const std::vector<WeakString> &delims, size_t startIndex = 0) const -> IndexPair<WeakString>
340  {
341  std::vector<size_t> positionVector;
342  positionVector.reserve(delims.size());
343 
344  for (size_t i = 0; i < delims.size(); i++)
345  positionVector.push_back(find(delims[i], startIndex));
346 
347  IndexPair<size_t> &iPair = library::Math::minimum(positionVector);
348  return { iPair.get_index(), delims[iPair.getValue()] };
349  };
350 
366  auto rfinds(const std::vector<std::string> &delims, size_t endIndex = SIZE_MAX) const -> IndexPair<WeakString>
367  {
368  std::vector<WeakString> wdelims(delims.size());
369  for (size_t i = 0; i < delims.size(); i++)
370  wdelims[i] = delims[i];
371 
372  return rfinds(wdelims, endIndex);
373  };
374 
375  auto rfinds(const std::vector<WeakString> &delims, size_t endIndex = SIZE_MAX) const -> IndexPair<WeakString>
376  {
377  std::vector<size_t> positionVector;
378  positionVector.reserve(delims.size());
379 
380  size_t position;
381 
382  for (size_t i = 0; i < delims.size(); i++)
383  {
384  position = rfind(delims[i], endIndex);
385 
386  if (position != std::string::npos)
387  positionVector.push_back(position);
388  }
389 
390  if (positionVector.empty() == true)
391  return { std::string::npos, WeakString() };
392 
393  IndexPair<size_t> &iPair = library::Math::maximum(positionVector);
394  return { iPair.get_index(), delims[iPair.getValue()] };
395  };
396 
397  /* --------------------------------------------------------------------
398  EXTRACTORS
399  -------------------------------------------------------------------- */
419  auto substr(size_t startIndex, size_t size = SIZE_MAX) const -> WeakString
420  {
421  if (size > size_ || startIndex + size > size_)
422  size = size_ - startIndex;
423 
424  return WeakString(data_ + startIndex, size);
425  };
426 
443  auto substring(size_t startIndex, size_t endIndex = SIZE_MAX) const -> WeakString
444  {
445  if (startIndex > endIndex)
446  std::swap(startIndex, endIndex);
447 
448  if (startIndex == endIndex || startIndex > size_ - 1)
449  return WeakString();
450 
451  if (endIndex > size_)
452  endIndex = size_;
453 
454  return WeakString(data_ + startIndex, data_ + endIndex);
455  };
456 
475  auto between(const WeakString &start = {}, const WeakString &end = {}) const -> WeakString
476  {
477  if (start.empty() == true && end.empty() == true)
478  return *this;
479  else if (start.empty() == true)
480  return substr(0, find(end));
481  else if (end.empty() == true)
482  return substr(find(start) + start.size());
483  else
484  {
485  size_t startIndex = find(start);
486 
487  return substring
488  (
489  startIndex + start.size(),
490  find(end, startIndex + start.size())
491  );
492  }
493  };
494 
502  auto split(const WeakString &delim) const -> std::vector<WeakString>
503  {
504  size_t startIndex = 0;
505  size_t x;
506 
507  //CONSTRUCT THE LIST OF QUOTES
508  std::queue<std::pair<size_t, size_t>> quoteList;
509  while ((x = find(delim, startIndex)) != npos)
510  {
511  quoteList.push({ startIndex, x });
512  startIndex = x + delim.size();
513  }
514  quoteList.push({ startIndex, size() });
515 
516  //ASSIGN THE STRING_VECTOR BY SUBSTRING
517  std::vector<WeakString> vec;
518  vec.reserve(quoteList.size());
519 
520  while (quoteList.empty() == false)
521  {
522  vec.push_back(substring(quoteList.front().first, quoteList.front().second));
523  quoteList.pop();
524  }
525  return vec;
526  };
527 
548  auto betweens(const WeakString &start = {}, const WeakString &end = {}) const -> std::vector<WeakString>
549  {
550  std::vector<WeakString> vec;
551 
552  if (start.empty() == true && end.empty() == true)
553  return vec;
554  else if (start == end) //NOT EMPTY, BUT EQUALS
555  {
556  std::queue<std::pair<size_t, size_t>> quoteList;
557 
558  size_t x, prevX = -1, n = 0;
559  while ((x = find(start, prevX + 1)) != npos)
560  {
561  if (++n % 2 == 0) //WHEN THE MATCHED NUMBER IS EVEN
562  quoteList.push({ prevX, x });
563  prevX = x;
564  }
565 
566  if (quoteList.size() == 0)
567  vec.push_back(*this);
568  else
569  {
570  vec.reserve(quoteList.size());
571  while (quoteList.empty() == false)
572  {
573  std::pair<size_t, size_t> &quote = quoteList.front();
574  vec.push_back(substring(quote.first + start.size()));
575 
576  quoteList.pop();
577  }
578  }
579  }
580  else //BEGIN AND END IS DIFFER
581  {
582  vec = split(start);
583  vec.erase(vec.begin());
584 
585  if (end.empty() == false)
586  for (long long i = (long long)vec.size() - 1; i >= 0; i--)
587  if (vec.at((size_t)i).find(end) == npos)
588  vec.erase(vec.begin() + (size_t)i);
589  else
590  vec[(size_t)i] = vec[(size_t)i].between("", end);
591  }
592  return vec;
593  };
594 
595  /* --------------------------------------------------------------------
596  TRIMS
597  -------------------------------------------------------------------- */
598  auto trim() const -> WeakString
599  {
600  return ltrim().rtrim();
601  };
602  auto ltrim() const -> WeakString
603  {
604  static const std::vector<std::string> SPACE_ARRAY = { " ", "\t", "\r", "\n" };
605 
606  return ltrim(SPACE_ARRAY);
607  };
608  auto rtrim() const -> WeakString
609  {
610  static const std::vector<std::string> SPACE_ARRAY = { " ", "\t", "\r", "\n" };
611 
612  return rtrim(SPACE_ARRAY);
613  };
614 
615  auto trim(const WeakString &delim) const -> WeakString
616  {
617  return ltrim(delim).rtrim(delim);
618  };
619  auto ltrim(const WeakString &delim) const -> WeakString
620  {
621  return ltrim(std::vector<WeakString>({delim}));
622  };
623  auto rtrim(const WeakString &delim) const -> WeakString
624  {
625  return rtrim(std::vector<WeakString>({ delim }));
626  };
627 
634  auto trim(const std::vector<std::string> &delims) const -> WeakString
635  {
636  return ltrim(delims).rtrim(delims);
637  };
638 
645  auto ltrim(const std::vector<std::string> &delims) const -> WeakString
646  {
647  std::vector<WeakString> wdelims(delims.size());
648  for (size_t i = 0; i < delims.size(); i++)
649  wdelims[i] = delims[i];
650 
651  return ltrim(wdelims);
652  };
653 
660  auto rtrim(const std::vector<std::string> &delims) const -> WeakString
661  {
662  std::vector<WeakString> wdelims(delims.size());
663  for (size_t i = 0; i < delims.size(); i++)
664  wdelims[i] = delims[i];
665 
666  return rtrim(wdelims);
667  };
668 
669  auto trim(const std::vector<WeakString> &delims) const -> WeakString
670  {
671  return ltrim(delims).rtrim(delims);
672  };
673  auto ltrim(const std::vector<WeakString> &delims) const -> WeakString
674  {
675  WeakString str(data_, size_);
676  IndexPair<size_t> indexPair = { 0, 0 };
677 
678  while (str.empty() == false)
679  {
680  std::vector<size_t> indexVec;
681  indexVec.reserve(delims.size());
682 
683  for (size_t i = 0; i < delims.size(); i++)
684  indexVec.push_back(str.find(delims[i]));
685 
686  indexPair = library::Math::minimum(indexVec);
687  if (indexPair.getValue() == 0)
688  {
689  size_t size = delims[indexPair.get_index()].size();
690 
691  str.data_ += size;
692  str.size_ -= size;
693  }
694  else
695  break;
696  }
697 
698  return str;
699  };
700  auto rtrim(const std::vector<WeakString> &delims) const -> WeakString
701  {
702  WeakString str(data_, size_);
703  IndexPair<size_t> pairIndex;
704 
705  while (str.empty() == false)
706  {
707  std::vector<size_t> indexVec;
708  indexVec.reserve(delims.size());
709 
710  for (size_t i = 0; i < delims.size(); i++)
711  {
712  size_t index = str.rfind(delims[i]);
713  if (index != npos)
714  indexVec.push_back(index);
715  }
716  if (indexVec.empty() == true)
717  break;
718 
719  pairIndex = library::Math::maximum(indexVec);
720  size_t size = delims[pairIndex.get_index()].size();
721 
722  if (pairIndex.getValue() == str.size() - size)
723  str.size_ -= size;
724  else
725  break;
726  }
727  return str;
728  };
729 
730  /* --------------------------------------------------------------------
731  REPLACERS
732  -------------------------------------------------------------------- */
740  auto replace(const WeakString &before, const WeakString &after) const -> std::string
741  {
742  size_t index = find(before);
743  if (index == npos)
744  return str();
745 
746  std::string str;
747  str.reserve(size() - before.size() + after.size());
748 
749  str.append(substr(0, index).str());
750  str.append(after.str());
751  str.append(substr(index + before.size()).str());
752 
753  return str;
754  };
755 
763  auto replaceAll(const WeakString &before, const WeakString &after) const -> std::string
764  {
765  return replaceAll({ { before, after } });
766  };
767 
775  auto replaceAll(const std::vector<std::pair<std::string, std::string>> &pairs) const -> std::string
776  {
777  std::vector<std::pair<WeakString, WeakString>> wPairs(pairs.size());
778  for (size_t i = 0; i < pairs.size(); i++)
779  wPairs[i] = { pairs[i].first, pairs[i].second };
780 
781  return replaceAll(wPairs);
782  };
783 
784  auto replaceAll(const std::vector<std::pair<WeakString, WeakString>> &pairs) const -> std::string
785  {
786  if (pairs.empty() == true)
787  return this->str();
788 
789  std::list<std::pair<size_t, size_t>> foundPairList;
790  //1ST IS STR-INDEX FROM FIND
791  //2ND IS PAIR-INDEX
792 
793  size_t size = this->size();
794  size_t index;
795  size_t i;
796 
797  //FIND POSITION-INDEX IN ORIGINAL STRING
798  for (i = 0; i < pairs.size(); i++)
799  {
800  index = 0;
801 
802  while (true)
803  {
804  index = find(pairs[i].first, index);
805  if (index == npos)
806  break;
807 
808  size -= pairs[i].first.size();
809  size += pairs[i].second.size();
810 
811  foundPairList.push_back({ index++, i });
812  }
813  }
814 
815  if (foundPairList.empty() == true)
816  return str();
817 
818  foundPairList.sort();
819 
820  //REPLACE
821  std::string str;
822  str.reserve((size_t)size);
823 
824  index = 0;
825 
826  while (foundPairList.empty() == false)
827  {
828  auto it = foundPairList.begin();
829  auto &before = pairs[it->second].first;
830  auto &after = pairs[it->second].second;
831 
832  str.append(substring(index, it->first).str());
833  str.append(after.str());
834 
835  index = it->first + before.size();
836  foundPairList.pop_front();
837  }
838  if (index <= this->size() - 1)
839  str.append(substr(index).str());
840 
841  return str;
842  };
843 
851  auto toLowerCase() const -> std::string
852  {
853  std::string &str = this->str();
854 
855  for (size_t i = 0; i < str.size(); i++)
856  if ('A' <= str[i] && str[i] <= 'Z')
857  str[i] = tolower(str[i]);
858 
859  return str;
860  };
861 
869  auto yoUpperCase() const -> std::string
870  {
871  std::string &str = this->str();
872 
873  for (size_t i = 0; i < str.size(); i++)
874  if ('a' <= str[i] && str[i] <= 'z')
875  str[i] = toupper(str[i]);
876 
877  return str;
878  };
879 
880  /* --------------------------------------------------------------------
881  COMPARISONS
882  -------------------------------------------------------------------- */
883  auto operator==(const WeakString &str) const -> bool
884  {
885  if (size_ != str.size_)
886  return false;
887  else if (data_ == str.data_)
888  return true;
889 
890  for (size_t i = 0; i < size(); i++)
891  if (data_[i] != str.data_[i])
892  return false;
893 
894  return true;
895  };
896 
897  auto operator<(const WeakString &str) const -> bool
898  {
899  size_t minSize = std::min<size_t>(size(), str.size());
900 
901  for (size_t i = 0; i < minSize; i++)
902  if (this->at(i) == str[i])
903  continue;
904  else if (this->at(i) < str[i])
905  return true;
906  else
907  return false;
908 
909  if (this->size() == minSize && this->size() != str.size())
910  return true;
911  else
912  return false;
913  };
914 
915  OPERATOR_METHODS_INLINE(WeakString)
916 
917  /* --------------------------------------------------------------------
918  CONVERSIONS
919  -------------------------------------------------------------------- */
926  auto str() const -> std::string
927  {
928  return std::string(data_, data_ + size_);
929  };
930 
931  operator std::string()
932  {
933  return str();
934  };
935  };
936 };
auto data() const -> const char *
Get string data; referenced characeters.
Definition: WeakString.hpp:206
auto str() const -> std::string
Get the string content.
Definition: WeakString.hpp:926
WeakString(const std::string &str)
Constructor by string.
Definition: WeakString.hpp:186
WeakString(const char &ch)
Constructor by a single character.
Definition: WeakString.hpp:156
auto empty() const -> bool
Tests wheter string is emtpy.
Definition: WeakString.hpp:231
auto rfinds(const std::vector< std::string > &delims, size_t endIndex=SIZE_MAX) const -> IndexPair< WeakString >
Finds last occurence in string.
Definition: WeakString.hpp:366
auto at(size_t index) const -> const char &
Get character of string Returns a const reference to the character at the specified position...
Definition: WeakString.hpp:242
WeakString(const char *data)
Constructor by characters.
Definition: WeakString.hpp:139
auto getValue() -> T &
Get reference of value.
Definition: IndexPair.hpp:92
WeakString()
Default Constructor does not reference any character.
Definition: WeakString.hpp:77
auto replaceAll(const WeakString &before, const WeakString &after) const -> std::string
Returns a string specified word is replaced.
Definition: WeakString.hpp:763
auto replace(const WeakString &before, const WeakString &after) const -> std::string
Replace portion of string once.
Definition: WeakString.hpp:740
auto toLowerCase() const -> std::string
Convert uppercase letters to lowercase.
Definition: WeakString.hpp:851
auto betweens(const WeakString &start={}, const WeakString &end={}) const -> std::vector< WeakString >
Generates substrings.
Definition: WeakString.hpp:548
WeakString(std::initializer_list< char > &il)
Constructor by a initializer list.
Definition: WeakString.hpp:169
auto finds(const std::vector< std::string > &delims, size_t startIndex=0) const -> IndexPair< WeakString >
Finds first occurence in string.
Definition: WeakString.hpp:330
static const std::vector< std::string > SPACE_ARRAY
An array containing whitespaces.
Definition: WeakString.hpp:41
static const size_t npos
Maximum value for size_t.
Definition: WeakString.hpp:56
const char * data_
Referenced characters&#39;s pointer of begining position.
Definition: WeakString.hpp:62
auto replaceAll(const std::vector< std::pair< std::string, std::string >> &pairs) const -> std::string
Returns a string specified words are replaced.
Definition: WeakString.hpp:775
static auto minimum(const _Cont &container) -> IndexPair< T >
Calculate minimum value with its index.
Definition: Math.hpp:177
WeakString(const char *data, size_t size)
Constructor by characters with specified size.
Definition: WeakString.hpp:102
auto between(const WeakString &start={}, const WeakString &end={}) const -> WeakString
Generates a substring.
Definition: WeakString.hpp:475
auto substring(size_t startIndex, size_t endIndex=SIZE_MAX) const -> WeakString
Generates a substring.
Definition: WeakString.hpp:443
auto trim(const std::vector< std::string > &delims) const -> WeakString
Removes all designated characters from the beginning and end of the specified string.
Definition: WeakString.hpp:634
auto find(const WeakString &delim, size_t startIndex=NULL) const -> size_t
Finds first occurence in string.
Definition: WeakString.hpp:273
size_t size_
(Specified) size of referenced characters
Definition: WeakString.hpp:67
A pair of index and its value(T)
Definition: IndexPair.hpp:30
auto ltrim(const std::vector< std::string > &delims) const -> WeakString
Removes all designated characters from the beginning of the specified string.
Definition: WeakString.hpp:645
auto substr(size_t startIndex, size_t size=SIZE_MAX) const -> WeakString
Generates a substring.
Definition: WeakString.hpp:419
static auto maximum(const _Cont &container) -> IndexPair< T >
Calculate maximum value with its index.
Definition: Math.hpp:208
auto rtrim(const std::vector< std::string > &delims) const -> WeakString
Removes all designated characters from the end of the specified string.
Definition: WeakString.hpp:660
auto operator[](size_t index) const -> const char &
Get character of string Returns a const reference to the character at the specified position...
Definition: WeakString.hpp:253
auto yoUpperCase() const -> std::string
Convert uppercase letters to lowercase.
Definition: WeakString.hpp:869
WeakString(const char *begin, const char *end)
Constructor by characters of begin and end.
Definition: WeakString.hpp:127
auto split(const WeakString &delim) const -> std::vector< WeakString >
Generates substrings.
Definition: WeakString.hpp:502
auto size() const -> size_t
Returns size of the characters which are being referenced.
Definition: WeakString.hpp:217
auto rfind(const WeakString &delim, size_t endIndex=SIZE_MAX) const -> size_t
Finds last occurence in string.
Definition: WeakString.hpp:297
auto get_index() const -> size_t
Get index.
Definition: IndexPair.hpp:82
A string class only references characeters, reference only.
Definition: WeakString.hpp:35