言語的および文化的特徴を忘れないでください

遅かれ早かれ、誰もがプログラムを書くときに言語的および文化的な多様性に関連する問題に直面します。 C ++で書いている私の友人の何人かが、バイクでこれらの問題を解決していることを知って非常に驚きました。まだstd ::ロケールが何であるかを知らない人のために、それをどのように扱うか、そしてそれを忘れたらどうなるかを簡単に示したいと思います...

std ::ロケール （ローカリゼーション）は、ユーザーの文化的および言語的特徴を考慮に入れることができるオブジェクトです。実際、これは特別なクラスのコンテナです。 ファセットは、自然言語に依存するアクションを実行する必要がある場合にプログラムにアクセスします。プログラムは、そのようなアクションをローカライズファセットに指示します。ローカライズには、任意のカスタムファセットを追加できます。しかし、標準的なものは、ローカライズに実装され、永久的または一時的に置き換えることができるため、最も興味深いものです。

照合（文字列比較）
数値（数値の入力/出力）
通貨（入金/出金）
時間（入力/出力時間）
ctype（文字分類）
メッセージ（メッセージ選択）

現実には、疑うことなく常にファセットを使用しています。標準テンプレートライブラリは、入力/出力にローカライズを使用します。 boost ::大文字と小文字の変換などの正規表現ローカリゼーションはプラットフォームによって定義されます。 * nixシステムのユーザーは、「ru_RU：UTF-8」、「en_US.UTF-8」などの行に精通しています。これらはプラットフォームのローカリゼーションの名前です。プログラムはユーザーのローカライズを使用します。ローカライズがユーザーによって指定されていない場合、「クラシック」なものが使用されます。

ファセットのローカライズとオーバーライドの使用例

標準のローカライズファセットを置き換える手法を試す例を考えてみましょう。通常、入力/出力のストリーミングを検討しますが、ローカライズに依存するコードを記述した場合に、それが何であるかわからない場合に何が起こるかに焦点を当てたいと思います。人気のあるBoost :: xpressiveライブラリでロケールを使用してみましょう（boost :: regexも使用できますが、初めてxpressiveについて聞いた人は、それについて読むと便利です）。

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

#include <boost/xpressive/xpressive.hpp> #include <iostream> using namespace std ; using namespace boost :: xpressive ; int main ( int argc, char * argv [ ] ) { sregex xpr = sregex :: compile ( "" , regex_constants :: icase ) ; smatch match ; string str ( " !" ) ; if ( regex_search ( str, match, xpr ) ) cout << "icase ok" << endl ; else cout << "icase fail" << endl ; return 0 ; }

プログラムの配信がプラットフォームに大きく依存しているのは驚くでしょう。さらに、1つのプラットフォームで、プログラムは異なる結果を生成する可能性があります。それはすべてロケールに関するものです。サンプルファイルのエンコーディングがwindows-1251であると仮定すると、「icase fail」の結果は、ユーザーロケールがcp1251以外のエンコーディングを持つプラットフォームで実現できます。このようなプラットフォームの最も一般的な例は、mingw（sourceforgeバイナリとしてダウンロード）+ Windowsです。この場合、boost :: xpressiveアルゴリズムは、cp-1251コードテーブルの拡張部分のどの文字が文字であるかを単純に知りません。そして、古典的なローカライズのctypeファセットは、これを非難することです。 xpressiveが動作するローカライズの正しいctypeファセットを報告することにより、望ましい結果を達成します。最も単純なケースでは、目的のローカリゼーションがシステムにインストールされている場合、それをグローバルにする必要があります

//グローバルなローカライズを設定します
std :: locale cp1251_locale （ "ru_RU.CP1251" ） ;
std :: ロケール :: グローバル（ cp1251_locale ） ;

または正規表現コンパイラに報告する

std :: locale cp1251_locale （ "ru_RU.CP1251" ） ;
sregex_compilerコンパイラ;
//使用するローカライズを正規表現コンパイラに伝えます
コンパイラ imbue （ cp1251_locale ） ;
sregex xpr =コンパイラ。 compile （ "world" 、regex_constants :: icase ） ;

すべて問題ありませんが、ローカリゼーションru_RUがサポートされていないプラットフォームでは、CP1251のコードで例外がスローされます。最良の場合、最悪の場合、名前は誤って示されます-必要なローカリゼーションがシステムにありません。独自のctypeファセットを実装することでこの問題を解決します（どの文字が文字で、大文字小文字がどのように変化するかをxpressiveに説明するのは彼です）。

CPtypeエンコーディングのctypeファセットの実装と例の最も簡単な例：

#include <boost / xpressive / xpressive.hpp>
#include <iostream>
名前空間 std を使用します。
ネームスペースブーストの使用 :: xpressive ;
/ ** @ briefで正しく動作するためのctypeファセットの非常に単純化された例
*エンコードされたCp1251 * /
クラス ctype_cp1251 ： public ctype < char >
{
公開：
/ ** ctype_baseの@ breifマスクは、可能なすべての型の列挙です
*文字-アルファ、数字、... * /
typedef typename ctype < char > :: ctype_base :: mask mask ;
//簡潔にするために、定数を再定義します
enum {
alpha = ctype < char > :: alpha 、
lower = ctype < char > :: lower 、
punct = ctype < char > :: punct
//他のマスク
} ;
/ ** @ briefメインコンストラクター。 r-人生の領域を特徴付ける
*ファセット。 詳細については、Straustrupの本をご覧ください* /
ctype_cp1251 （ size_t r = 0 ）
{
//マスクテーブルを初期化します。インデックスは、charの負の部分です。
//つまり、ext_tab [1]はchar（-1）-'i'のマスクです
ext_tab [ 0 ] = 0 ;
for （ size_t i = 1 ; i <= 32 ; ++ i ）
ext_tab [ i ] = alpha | 低い;
for （ size_t i = 33 ; i <= 64 ; ++ i ）
ext_tab [ i ] = alpha | 上;
// ...この例の他のキャラクターは面白くない
for （ size_t i = 65 ; i <= 128 ; ++ i ）
ext_tab [ i ] = punct ;
}
〜ctype_cp1251 （）
{ }
保護された：
/ ** @ brief文字cがマスクmと一致するかどうかの質問に答えます* /
仮想ブールは（ mask m、 char c ） const
{
if （ 0 <= c && c <= 127 ）
return ctype < char > :: is （ m、c ） ;
else if （ -128 <= c && c < 0 ）
return ext_tab [ static_cast < size_t > （ c * -1 ） ] ＆ m ;
}
/ ** @ brief cを大文字に変換します* /
virtual char do_toupper （ char c ） const
{
if （ 0 <= c && c <= 127 ）
return ctype < char > :: do_toupper （ c ） ;
else if （ is （ lower、c ））
return c - 32 ;
return c ;
}
/ ** @ brief文字cを小文字に変換します* /
virtual char do_tolower （ char c ） const
{
if （ 0 <= c && c <= 127 ）
return ctype < char > :: do_tolower （ c ） ;
else if （ is （ upper、c ））
return c + 32 ;
return c ;
}
//例が複雑にならないように、残りは再定義しません
//仮想関数
プライベート：
//コピーの禁止
ctype_cp1251 （ const ctype_cp1251 ＆） ;
const ctype_cp1251 ＆ operator = （ const ctype_cp1251 ＆） ;
ext_tab [ 129 ]をマスクします。 // @ <コードテーブルCP1251の拡張部分のマスク
} ;
int main （ int argc、 char * argv [ ] ）
{
//ファセットのインスタンスを作成します
ctype < char > * ctype_cp1251_facet = new ctype_cp1251 （） ;
//を使用して、現在のローカライズに基づいて新しいローカライズを作成します
//上記で定義されたファセット。グローバルを定義できます
//説明されたファセットを使用してローカライズし、すべてのクラスと
//関数、彼らはそれを使用します。
ロケールcp1251_locale （ locale （ "" ）、ctype_cp1251_facet ） ;
//特定のローカライズで正規表現コンパイラを作成します
sregex_compilerコンパイラ;
コンパイラ imbue （ cp1251_locale ） ;
sregex xpr =コンパイラ。 compile （ "world" 、regex_constants :: icase ） ;
マッチマッチ;
文字列str （「こんにちは平和！」） ;
if （ regex_search （ str、match、xpr ））
cout << "icase ok" << endl ;
他に
cout << "icase fail" << endl ;
0を返します。
}

これで、プログラムの結果は特定のプラットフォームに依存しなくなります。標準ファセットを再定義するか、新しいファセットを追加することにより、ユーザーの文化的および言語的特性に応じてアルゴリズム/プログラムの動作を制御できます。

std ::ローカルクラスとファセットの使用方法の完全な説明は、付録のBjörnStraustrupの著書「C ++ Programming Language」の第3 特別版に記載されています。任意のSTLマニュアルを使用して、ファセット構造を明確にすることができます。例えばここに。

エンコード変換のタスクは、codecvtファセットを実装することで解決されます。興味深い場合は、次の記事で説明します。

______________________

All Articles

言語的および文化的特徴を忘れないでください

ファセットのローカライズとオーバーライドの使用例

テキストは©SoftCoder.ruによってブログエディターで作成されます。

More articles: