mirror of
https://gitee.com/zlgopen/awtk.git
synced 2024-12-02 03:58:33 +08:00
117 lines
3.9 KiB
C++
117 lines
3.9 KiB
C++
/*
|
|
* Copyright (C) 2009 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef PINYINIME_INCLUDE_DICTLIST_H__
|
|
#define PINYINIME_INCLUDE_DICTLIST_H__
|
|
|
|
#include "file.h"
|
|
#include <stdlib.h>
|
|
#include "./dictdef.h"
|
|
#include "./searchutility.h"
|
|
#include "./spellingtrie.h"
|
|
#include "./utf16char.h"
|
|
|
|
namespace ime_pinyin {
|
|
|
|
class DictList {
|
|
private:
|
|
bool initialized_;
|
|
|
|
const SpellingTrie* spl_trie_;
|
|
|
|
// Number of SingCharItem. The first is blank, because id 0 is invalid.
|
|
unsigned scis_num_;
|
|
char16* scis_hz_;
|
|
SpellingId* scis_splid_;
|
|
|
|
// The large memory block to store the word list.
|
|
char16* buf_;
|
|
|
|
// Starting position of those words whose lengths are i+1, counted in
|
|
// char16
|
|
unsigned start_pos_[kMaxLemmaSize + 1];
|
|
|
|
unsigned start_id_[kMaxLemmaSize + 1];
|
|
|
|
int (*cmp_func_[kMaxLemmaSize])(const void*, const void*);
|
|
|
|
bool alloc_resource(unsigned buf_size, unsigned scim_num);
|
|
|
|
void free_resource();
|
|
|
|
#ifdef ___BUILD_MODEL___
|
|
// Calculate the requsted memory, including the start_pos[] buffer.
|
|
unsigned calculate_size(const LemmaEntry* lemma_arr, unsigned lemma_num);
|
|
|
|
void fill_scis(const SingleCharItem* scis, unsigned scis_num);
|
|
|
|
// Copy the related content to the inner buffer
|
|
// It should be called after calculate_size()
|
|
void fill_list(const LemmaEntry* lemma_arr, unsigned lemma_num);
|
|
|
|
// Find the starting position for the buffer of those 2-character Chinese word
|
|
// whose first character is the given Chinese character.
|
|
char16* find_pos2_startedbyhz(char16 hz_char);
|
|
#endif
|
|
|
|
// Find the starting position for the buffer of those words whose lengths are
|
|
// word_len. The given parameter cmp_func decides how many characters from
|
|
// beginning will be used to compare.
|
|
char16* find_pos_startedbyhzs(const char16 last_hzs[], unsigned word_Len,
|
|
int (*cmp_func)(const void*, const void*));
|
|
|
|
public:
|
|
DictList();
|
|
~DictList();
|
|
|
|
bool save_list(FILE* fp);
|
|
bool load_list(FILE* fp);
|
|
|
|
#ifdef ___BUILD_MODEL___
|
|
// Init the list from the LemmaEntry array.
|
|
// lemma_arr should have been sorted by the hanzi_str, and have been given
|
|
// ids from 1
|
|
bool init_list(const SingleCharItem* scis, unsigned scis_num, const LemmaEntry* lemma_arr,
|
|
unsigned lemma_num);
|
|
#endif
|
|
|
|
// Get the hanzi string for the given id
|
|
uint16 get_lemma_str(LemmaIdType id_hz, char16* str_buf, uint16 str_max);
|
|
|
|
void convert_to_hanzis(char16* str, uint16 str_len);
|
|
|
|
void convert_to_scis_ids(char16* str, uint16 str_len);
|
|
|
|
// last_hzs stores the last n Chinese characters history, its length should be
|
|
// less or equal than kMaxPredictSize.
|
|
// hzs_len specifies the length(<= kMaxPredictSize).
|
|
// predict_buf is used to store the result.
|
|
// buf_len specifies the buffer length.
|
|
// b4_used specifies how many items before predict_buf have been used.
|
|
// Returned value is the number of newly added items.
|
|
unsigned predict(const char16 last_hzs[], uint16 hzs_len, NPredictItem* npre_items,
|
|
unsigned npre_max, unsigned b4_used);
|
|
|
|
// If half_splid is a valid half spelling id, return those full spelling
|
|
// ids which share this half id.
|
|
uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid, uint16* splids, uint16 max_splids);
|
|
|
|
LemmaIdType get_lemma_id(const char16* str, uint16 str_len);
|
|
};
|
|
} // namespace ime_pinyin
|
|
|
|
#endif // PINYINIME_INCLUDE_DICTLIST_H__
|