Java源码示例:com.ibm.icu.text.Normalizer2
示例1
public CollationBuilder(CollationTailoring b) {
nfd = Normalizer2.getNFDInstance();
fcd = Norm2AllModes.getFCDNormalizer2();
nfcImpl = Norm2AllModes.getNFCInstance().impl;
base = b;
baseData = b.data;
rootElements = new CollationRootElements(b.data.rootElements);
variableTop = 0;
dataBuilder = new CollationDataBuilder();
fastLatinEnabled = true;
cesLength = 0;
rootPrimaryIndexes = new UVector32();
nodes = new UVector64();
nfcImpl.ensureCanonIterData();
dataBuilder.initForTailoring(baseData);
}
示例2
/** Creates a new ICUFoldingFilterFactory */
public ICUFoldingFilterFactory(Map<String,String> args) {
super(args);
Normalizer2 normalizer = ICUFoldingFilter.NORMALIZER;
String filter = get(args, "filter");
if (filter != null) {
UnicodeSet set = new UnicodeSet(filter);
if (!set.isEmpty()) {
set.freeze();
normalizer = new FilteredNormalizer2(normalizer, set);
}
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
this.normalizer = normalizer;
}
示例3
/** Creates a new ICUNormalizer2CharFilterFactory */
public ICUNormalizer2CharFilterFactory(Map<String,String> args) {
super(args);
String form = get(args, "form", "nfkc_cf");
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
Normalizer2 normalizer = Normalizer2.getInstance
(null, form, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
String filter = get(args, "filter");
if (filter != null) {
UnicodeSet set = new UnicodeSet(filter);
if (!set.isEmpty()) {
set.freeze();
normalizer = new FilteredNormalizer2(normalizer, set);
}
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
this.normalizer = normalizer;
}
示例4
/** Creates a new ICUNormalizer2FilterFactory */
public ICUNormalizer2FilterFactory(Map<String,String> args) {
super(args);
String form = get(args, "form", "nfkc_cf");
String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose");
Normalizer2 normalizer = Normalizer2.getInstance
(null, form, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE);
String filter = get(args, "filter");
if (filter != null) {
UnicodeSet set = new UnicodeSet(filter);
if (!set.isEmpty()) {
set.freeze();
normalizer = new FilteredNormalizer2(normalizer, set);
}
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
this.normalizer = normalizer;
}
示例5
public void testAlternate() throws IOException {
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(
tokenizer,
/* specify nfc with decompose to get nfd */
Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
}
};
// decompose EAcute into E + combining Acute
assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
a.close();
}
示例6
public void testNormalization() throws IOException {
String input = "ʰ㌰゙5℃№㈱㌘,バッファーの正規化のテスト.㋐㋑㋒㋓㋔カキクケコザジズゼゾg̈각/각நிเกषिchkʷक्षि";
Normalizer2 normalizer = Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE);
String expectedOutput = normalizer.normalize(input);
CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), normalizer);
char[] tempBuff = new char[10];
StringBuilder output = new StringBuilder();
while (true) {
int length = reader.read(tempBuff);
if (length == -1) {
break;
}
output.append(tempBuff, 0, length);
assertEquals(output.toString(), normalizer.normalize(input.substring(0, reader.correctOffset(output.length()))));
}
assertEquals(expectedOutput, output.toString());
}
示例7
public void testTokenStream() throws IOException {
// '℃', '№', '㈱', '㌘', 'サ'+'<<', 'ソ'+'<<', '㌰'+'<<'
String input = "℃ № ㈱ ㌘ ザ ゾ ㌰゙";
CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE));
Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenStream.setReader(reader);
assertTokenStreamContents(tokenStream,
new String[] {"°C", "No", "(株)", "グラム", "ザ", "ゾ", "ピゴ"},
new int[] {0, 2, 4, 6, 8, 11, 14},
new int[] {1, 3, 5, 7, 10, 13, 16},
input.length());
}
示例8
public void testTokenStream2() throws IOException {
// '㌰', '<<'゙, '5', '℃', '№', '㈱', '㌘', 'サ', '<<', 'ソ', '<<'
String input = "㌰゙5℃№㈱㌘ザゾ";
CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
Tokenizer tokenStream = new NGramTokenizer(newAttributeFactory(), 1, 1);
tokenStream.setReader(reader);
assertTokenStreamContents(tokenStream,
new String[] {"ピ", "ゴ", "5", "°", "c", "n", "o", "(", "株", ")", "グ", "ラ", "ム", "ザ", "ゾ"},
new int[]{0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9},
new int[]{1, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 7, 9, 11},
input.length()
);
}
示例9
public void testMassiveLigature() throws IOException {
String input = "\uFDFA";
CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenStream.setReader(reader);
assertTokenStreamContents(tokenStream,
new String[] {"صلى", "الله", "عليه", "وسلم"},
new int[]{0, 0, 0, 0},
new int[]{0, 0, 0, 1},
input.length()
);
}
示例10
protected Normalizer2.Mode getNormalizationMode(Settings settings) {
Normalizer2.Mode normalizationMode;
switch (settings.get("normalization_mode", "compose")) {
case "compose_contiguous":
normalizationMode = Normalizer2.Mode.COMPOSE_CONTIGUOUS;
break;
case "decompose":
normalizationMode = Normalizer2.Mode.DECOMPOSE;
break;
case "fcd":
normalizationMode = Normalizer2.Mode.FCD;
break;
default:
normalizationMode = Normalizer2.Mode.COMPOSE;
break;
}
return normalizationMode;
}
示例11
protected Normalizer2.Mode getNormalizationMode(Settings settings) {
Normalizer2.Mode normalizationMode;
switch (settings.get("normalization_mode", "compose")) {
case "compose_contiguous":
normalizationMode = Normalizer2.Mode.COMPOSE_CONTIGUOUS;
break;
case "decompose":
normalizationMode = Normalizer2.Mode.DECOMPOSE;
break;
case "fcd":
normalizationMode = Normalizer2.Mode.FCD;
break;
default:
normalizationMode = Normalizer2.Mode.COMPOSE;
break;
}
return normalizationMode;
}
示例12
public IcuNormalizerCharFilterFactory(IndexSettings indexSettings, Environment environment, String name,
Settings settings) {
super(indexSettings, name);
Normalizer2 base = Normalizer2.getInstance(getNormalizationResource(settings),
getNormalizationName(settings), getNormalizationMode(settings));
String unicodeSetFilter = settings.get("unicode_set_filter");
this.normalizer = unicodeSetFilter != null ?
new FilteredNormalizer2(base, new UnicodeSet(unicodeSetFilter).freeze()) : base;
}
示例13
public IcuNormalizerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name,
Settings settings) {
super(indexSettings, name, settings);
Normalizer2 base = Normalizer2.getInstance(getNormalizationResource(settings),
getNormalizationName(settings), getNormalizationMode(settings));
String unicodeSetFilter = settings.get("unicode_set_filter");
this.normalizer = unicodeSetFilter != null ?
new FilteredNormalizer2(base, new UnicodeSet(unicodeSetFilter).freeze()) : base;
}
示例14
private static Analyzer createAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY,
new DefaultIcuTokenizerConfig(false, true));
TokenFilter filter = new IcuNormalizerFilter(tokenizer,
Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
return new TokenStreamComponents(tokenizer, filter);
}
};
}
示例15
public void testAlternate() throws Exception {
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new IcuNormalizerFilter(
tokenizer,
Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
}
};
assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
a.close();
}
示例16
public void testEmptyTerm() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer,
new IcuNormalizerFilter(tokenizer,
Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)));
}
};
checkOneTerm(a, "", "");
a.close();
}
示例17
/**
* 21.1.3.12 String.prototype.normalize ( [ form ] )
*
* @param cx
* the execution context
* @param thisValue
* the function this-value
* @param form
* the normalisation form
* @return the normalized string
*/
@Function(name = "normalize", arity = 0)
public static Object normalize(ExecutionContext cx, Object thisValue, Object form) {
/* step 1 */
Object obj = RequireObjectCoercible(cx, thisValue);
/* step 2 */
String s = ToFlatString(cx, obj);
/* steps 3-4 */
String f = !Type.isUndefined(form) ? ToFlatString(cx, form) : "NFC";
/* step 5 */
Normalizer2 normalizer;
switch (f) {
case "NFC":
normalizer = Normalizer2.getNFCInstance();
break;
case "NFD":
normalizer = Normalizer2.getNFDInstance();
break;
case "NFKC":
normalizer = Normalizer2.getNFKCInstance();
break;
case "NFKD":
normalizer = Normalizer2.getNFKDInstance();
break;
default:
throw newRangeError(cx, Messages.Key.InvalidNormalizationForm, f);
}
/* steps 6-7 */
return ensureValidString(cx, () -> normalizer.normalize(s));
}
示例18
@Override
int getValue(int c) {
return Normalizer2.getNFDInstance().getCombiningClass(c);
}
示例19
ICUNormalizer2CharFilter(Reader in, Normalizer2 normalizer, int bufferSize) {
super(in);
this.normalizer = Objects.requireNonNull(normalizer);
this.tmpBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
}
示例20
public void testNFC() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE), 20, RANDOM_MULTIPLIER*1000, 128);
}
示例21
public void testNFCHuge() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE), 256, RANDOM_MULTIPLIER*500, 16);
}
示例22
public void testNFD() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE), 20, RANDOM_MULTIPLIER*1000, 128);
}
示例23
public void testNFDHuge() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE), 256, RANDOM_MULTIPLIER*500, 16);
}
示例24
public void testNFKC() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE), 20, RANDOM_MULTIPLIER*1000, 128);
}
示例25
public void testNFKCHuge() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE), 256, RANDOM_MULTIPLIER*500, 16);
}
示例26
public void testNFKD() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.DECOMPOSE), 20, RANDOM_MULTIPLIER*1000, 128);
}
示例27
public void testNFKDHuge() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.DECOMPOSE), 256, RANDOM_MULTIPLIER*500, 16);
}
示例28
public void testNFKC_CF() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE), 20, RANDOM_MULTIPLIER*1000, 128);
}
示例29
public void testNFKC_CFHuge() throws Exception {
doTestMode(Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE), 256, RANDOM_MULTIPLIER*500, 16);
}
示例30
@Override
int getValue(int c) {
return Normalizer2.getNFDInstance().getCombiningClass(c);
}