libstdc++
codecvt_specializations.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000-2017 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 //
26 // ISO C++ 14882: 22.2.1.5 Template class codecvt
27 //
28 
29 // Written by Benjamin Kosnik <bkoz@redhat.com>
30 
31 /** @file ext/codecvt_specializations.h
32  * This file is a GNU extension to the Standard C++ Library.
33  */
34 
35 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
36 #define _EXT_CODECVT_SPECIALIZATIONS_H 1
37 
38 #include <bits/c++config.h>
39 #include <locale>
40 #include <iconv.h>
41 
42 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
43 {
44 _GLIBCXX_BEGIN_NAMESPACE_CXX11
45 _GLIBCXX_BEGIN_NAMESPACE_VERSION
46 
47  /// Extension to use iconv for dealing with character encodings.
48  // This includes conversions and comparisons between various character
49  // sets. This object encapsulates data that may need to be shared between
50  // char_traits, codecvt and ctype.
52  {
53  public:
54  // Types:
55  // NB: A conversion descriptor subsumes and enhances the
56  // functionality of a simple state type such as mbstate_t.
57  typedef iconv_t descriptor_type;
58 
59  protected:
60  // Name of internal character set encoding.
61  std::string _M_int_enc;
62 
63  // Name of external character set encoding.
64  std::string _M_ext_enc;
65 
66  // Conversion descriptor between external encoding to internal encoding.
67  descriptor_type _M_in_desc;
68 
69  // Conversion descriptor between internal encoding to external encoding.
70  descriptor_type _M_out_desc;
71 
72  // The byte-order marker for the external encoding, if necessary.
73  int _M_ext_bom;
74 
75  // The byte-order marker for the internal encoding, if necessary.
76  int _M_int_bom;
77 
78  // Number of external bytes needed to construct one complete
79  // character in the internal encoding.
80  // NB: -1 indicates variable, or stateful, encodings.
81  int _M_bytes;
82 
83  public:
84  explicit
86  : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
87  { }
88 
89  explicit
90  encoding_state(const char* __int, const char* __ext,
91  int __ibom = 0, int __ebom = 0, int __bytes = 1)
92  : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
93  _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
94  { init(); }
95 
96  // 21.1.2 traits typedefs
97  // p4
98  // typedef STATE_T state_type
99  // requires: state_type shall meet the requirements of
100  // CopyConstructible types (20.1.3)
101  // NB: This does not preserve the actual state of the conversion
102  // descriptor member, but it does duplicate the encoding
103  // information.
104  encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
105  { construct(__obj); }
106 
107  // Need assignment operator as well.
109  operator=(const encoding_state& __obj)
110  {
111  construct(__obj);
112  return *this;
113  }
114 
115  ~encoding_state()
116  { destroy(); }
117 
118  bool
119  good() const throw()
120  {
121  const descriptor_type __err = (iconv_t)(-1);
122  bool __test = _M_in_desc && _M_in_desc != __err;
123  __test &= _M_out_desc && _M_out_desc != __err;
124  return __test;
125  }
126 
127  int
128  character_ratio() const
129  { return _M_bytes; }
130 
131  const std::string
132  internal_encoding() const
133  { return _M_int_enc; }
134 
135  int
136  internal_bom() const
137  { return _M_int_bom; }
138 
139  const std::string
140  external_encoding() const
141  { return _M_ext_enc; }
142 
143  int
144  external_bom() const
145  { return _M_ext_bom; }
146 
147  const descriptor_type&
148  in_descriptor() const
149  { return _M_in_desc; }
150 
151  const descriptor_type&
152  out_descriptor() const
153  { return _M_out_desc; }
154 
155  protected:
156  void
157  init()
158  {
159  const descriptor_type __err = (iconv_t)(-1);
160  const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
161  if (!_M_in_desc && __have_encodings)
162  {
163  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
164  if (_M_in_desc == __err)
165  std::__throw_runtime_error(__N("encoding_state::_M_init "
166  "creating iconv input descriptor failed"));
167  }
168  if (!_M_out_desc && __have_encodings)
169  {
170  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
171  if (_M_out_desc == __err)
172  std::__throw_runtime_error(__N("encoding_state::_M_init "
173  "creating iconv output descriptor failed"));
174  }
175  }
176 
177  void
178  construct(const encoding_state& __obj)
179  {
180  destroy();
181  _M_int_enc = __obj._M_int_enc;
182  _M_ext_enc = __obj._M_ext_enc;
183  _M_ext_bom = __obj._M_ext_bom;
184  _M_int_bom = __obj._M_int_bom;
185  _M_bytes = __obj._M_bytes;
186  init();
187  }
188 
189  void
190  destroy() throw()
191  {
192  const descriptor_type __err = (iconv_t)(-1);
193  if (_M_in_desc && _M_in_desc != __err)
194  {
195  iconv_close(_M_in_desc);
196  _M_in_desc = 0;
197  }
198  if (_M_out_desc && _M_out_desc != __err)
199  {
200  iconv_close(_M_out_desc);
201  _M_out_desc = 0;
202  }
203  }
204  };
205 
206  /// encoding_char_traits
207  // Custom traits type with encoding_state for the state type, and the
208  // associated fpos<encoding_state> for the position type, all other
209  // bits equivalent to the required char_traits instantiations.
210  template<typename _CharT>
212  : public std::char_traits<_CharT>
213  {
214  typedef encoding_state state_type;
215  typedef typename std::fpos<state_type> pos_type;
216  };
217 
218 _GLIBCXX_END_NAMESPACE_VERSION
219 _GLIBCXX_END_NAMESPACE_CXX11
220 } // namespace
221 
222 
223 namespace std _GLIBCXX_VISIBILITY(default)
224 {
225 _GLIBCXX_BEGIN_NAMESPACE_VERSION
226 
228 
229  /// codecvt<InternT, _ExternT, encoding_state> specialization.
230  // This partial specialization takes advantage of iconv to provide
231  // code conversions between a large number of character encodings.
232  template<typename _InternT, typename _ExternT>
233  class codecvt<_InternT, _ExternT, encoding_state>
234  : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
235  {
236  public:
237  // Types:
238  typedef codecvt_base::result result;
239  typedef _InternT intern_type;
240  typedef _ExternT extern_type;
242  typedef state_type::descriptor_type descriptor_type;
243 
244  // Data Members:
245  static locale::id id;
246 
247  explicit
248  codecvt(size_t __refs = 0)
250  { }
251 
252  explicit
253  codecvt(state_type& __enc, size_t __refs = 0)
255  { }
256 
257  protected:
258  virtual
259  ~codecvt() { }
260 
261  virtual result
262  do_out(state_type& __state, const intern_type* __from,
263  const intern_type* __from_end, const intern_type*& __from_next,
264  extern_type* __to, extern_type* __to_end,
265  extern_type*& __to_next) const;
266 
267  virtual result
268  do_unshift(state_type& __state, extern_type* __to,
269  extern_type* __to_end, extern_type*& __to_next) const;
270 
271  virtual result
272  do_in(state_type& __state, const extern_type* __from,
273  const extern_type* __from_end, const extern_type*& __from_next,
274  intern_type* __to, intern_type* __to_end,
275  intern_type*& __to_next) const;
276 
277  virtual int
278  do_encoding() const throw();
279 
280  virtual bool
281  do_always_noconv() const throw();
282 
283  virtual int
284  do_length(state_type&, const extern_type* __from,
285  const extern_type* __end, size_t __max) const;
286 
287  virtual int
288  do_max_length() const throw();
289  };
290 
291  template<typename _InternT, typename _ExternT>
292  locale::id
294 
295  // This adaptor works around the signature problems of the second
296  // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2
297  // uses 'char**', which matches the POSIX 1003.1-2001 standard.
298  // Using this adaptor, g++ will do the work for us.
299  template<typename _Tp>
300  inline size_t
301  __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
302  iconv_t __cd, char** __inbuf, size_t* __inbytes,
303  char** __outbuf, size_t* __outbytes)
304  { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
305 
306  template<typename _InternT, typename _ExternT>
307  codecvt_base::result
309  do_out(state_type& __state, const intern_type* __from,
310  const intern_type* __from_end, const intern_type*& __from_next,
311  extern_type* __to, extern_type* __to_end,
312  extern_type*& __to_next) const
313  {
314  result __ret = codecvt_base::error;
315  if (__state.good())
316  {
317  const descriptor_type& __desc = __state.out_descriptor();
318  const size_t __fmultiple = sizeof(intern_type);
319  size_t __fbytes = __fmultiple * (__from_end - __from);
320  const size_t __tmultiple = sizeof(extern_type);
321  size_t __tbytes = __tmultiple * (__to_end - __to);
322 
323  // Argument list for iconv specifies a byte sequence. Thus,
324  // all to/from arrays must be brutally casted to char*.
325  char* __cto = reinterpret_cast<char*>(__to);
326  char* __cfrom;
327  size_t __conv;
328 
329  // Some encodings need a byte order marker as the first item
330  // in the byte stream, to designate endian-ness. The default
331  // value for the byte order marker is NULL, so if this is
332  // the case, it's not necessary and we can just go on our
333  // merry way.
334  int __int_bom = __state.internal_bom();
335  if (__int_bom)
336  {
337  size_t __size = __from_end - __from;
338  intern_type* __cfixed = static_cast<intern_type*>
339  (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
340  __cfixed[0] = static_cast<intern_type>(__int_bom);
341  char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
342  __cfrom = reinterpret_cast<char*>(__cfixed);
343  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
344  &__fbytes, &__cto, &__tbytes);
345  }
346  else
347  {
348  intern_type* __cfixed = const_cast<intern_type*>(__from);
349  __cfrom = reinterpret_cast<char*>(__cfixed);
350  __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
351  &__cto, &__tbytes);
352  }
353 
354  if (__conv != size_t(-1))
355  {
356  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
357  __to_next = reinterpret_cast<extern_type*>(__cto);
358  __ret = codecvt_base::ok;
359  }
360  else
361  {
362  if (__fbytes < __fmultiple * (__from_end - __from))
363  {
364  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
365  __to_next = reinterpret_cast<extern_type*>(__cto);
366  __ret = codecvt_base::partial;
367  }
368  else
369  __ret = codecvt_base::error;
370  }
371  }
372  return __ret;
373  }
374 
375  template<typename _InternT, typename _ExternT>
376  codecvt_base::result
378  do_unshift(state_type& __state, extern_type* __to,
379  extern_type* __to_end, extern_type*& __to_next) const
380  {
381  result __ret = codecvt_base::error;
382  if (__state.good())
383  {
384  const descriptor_type& __desc = __state.in_descriptor();
385  const size_t __tmultiple = sizeof(intern_type);
386  size_t __tlen = __tmultiple * (__to_end - __to);
387 
388  // Argument list for iconv specifies a byte sequence. Thus,
389  // all to/from arrays must be brutally casted to char*.
390  char* __cto = reinterpret_cast<char*>(__to);
391  size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
392  &__cto, &__tlen);
393 
394  if (__conv != size_t(-1))
395  {
396  __to_next = reinterpret_cast<extern_type*>(__cto);
397  if (__tlen == __tmultiple * (__to_end - __to))
398  __ret = codecvt_base::noconv;
399  else if (__tlen == 0)
400  __ret = codecvt_base::ok;
401  else
402  __ret = codecvt_base::partial;
403  }
404  else
405  __ret = codecvt_base::error;
406  }
407  return __ret;
408  }
409 
410  template<typename _InternT, typename _ExternT>
411  codecvt_base::result
412  codecvt<_InternT, _ExternT, encoding_state>::
413  do_in(state_type& __state, const extern_type* __from,
414  const extern_type* __from_end, const extern_type*& __from_next,
415  intern_type* __to, intern_type* __to_end,
416  intern_type*& __to_next) const
417  {
418  result __ret = codecvt_base::error;
419  if (__state.good())
420  {
421  const descriptor_type& __desc = __state.in_descriptor();
422  const size_t __fmultiple = sizeof(extern_type);
423  size_t __flen = __fmultiple * (__from_end - __from);
424  const size_t __tmultiple = sizeof(intern_type);
425  size_t __tlen = __tmultiple * (__to_end - __to);
426 
427  // Argument list for iconv specifies a byte sequence. Thus,
428  // all to/from arrays must be brutally casted to char*.
429  char* __cto = reinterpret_cast<char*>(__to);
430  char* __cfrom;
431  size_t __conv;
432 
433  // Some encodings need a byte order marker as the first item
434  // in the byte stream, to designate endian-ness. The default
435  // value for the byte order marker is NULL, so if this is
436  // the case, it's not necessary and we can just go on our
437  // merry way.
438  int __ext_bom = __state.external_bom();
439  if (__ext_bom)
440  {
441  size_t __size = __from_end - __from;
442  extern_type* __cfixed = static_cast<extern_type*>
443  (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
444  __cfixed[0] = static_cast<extern_type>(__ext_bom);
445  char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
446  __cfrom = reinterpret_cast<char*>(__cfixed);
447  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
448  &__flen, &__cto, &__tlen);
449  }
450  else
451  {
452  extern_type* __cfixed = const_cast<extern_type*>(__from);
453  __cfrom = reinterpret_cast<char*>(__cfixed);
454  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
455  &__flen, &__cto, &__tlen);
456  }
457 
458 
459  if (__conv != size_t(-1))
460  {
461  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
462  __to_next = reinterpret_cast<intern_type*>(__cto);
463  __ret = codecvt_base::ok;
464  }
465  else
466  {
467  if (__flen < static_cast<size_t>(__from_end - __from))
468  {
469  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
470  __to_next = reinterpret_cast<intern_type*>(__cto);
471  __ret = codecvt_base::partial;
472  }
473  else
474  __ret = codecvt_base::error;
475  }
476  }
477  return __ret;
478  }
479 
480  template<typename _InternT, typename _ExternT>
481  int
482  codecvt<_InternT, _ExternT, encoding_state>::
483  do_encoding() const throw()
484  {
485  int __ret = 0;
486  if (sizeof(_ExternT) <= sizeof(_InternT))
487  __ret = sizeof(_InternT) / sizeof(_ExternT);
488  return __ret;
489  }
490 
491  template<typename _InternT, typename _ExternT>
492  bool
493  codecvt<_InternT, _ExternT, encoding_state>::
494  do_always_noconv() const throw()
495  { return false; }
496 
497  template<typename _InternT, typename _ExternT>
498  int
499  codecvt<_InternT, _ExternT, encoding_state>::
500  do_length(state_type&, const extern_type* __from,
501  const extern_type* __end, size_t __max) const
502  { return std::min(__max, static_cast<size_t>(__end - __from)); }
503 
504  // _GLIBCXX_RESOLVE_LIB_DEFECTS
505  // 74. Garbled text for codecvt::do_max_length
506  template<typename _InternT, typename _ExternT>
507  int
508  codecvt<_InternT, _ExternT, encoding_state>::
509  do_max_length() const throw()
510  { return 1; }
511 
512 _GLIBCXX_END_NAMESPACE_VERSION
513 } // namespace
514 
515 #endif
_GLIBCXX14_CONSTEXPR const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:195
virtual result do_out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Extension to use iconv for dealing with character encodings.
Facet ID class.The ID class provides facets with an index used to identify them. Every facet class mu...
Common base for codecvt functions.
Definition: codecvt.h:68
Class representing stream positions.
Definition: postypes.h:112
const _CharT * c_str() const noexcept
Return const pointer to null-terminated contents.
Basis for explicit traits specializations.
Definition: char_traits.h:269
size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
Primary class template codecvt.NB: Generic, mostly useless implementation.
Definition: codecvt.h:274