当前位置:编程学习 > JAVA >>

Thrift 中以GBK传输中文字符和分词服务搭建

    项目中需要将分词做成线上服务形式,服务后端用c++实现,客户端用java实现,来调用分词的服务。由于分词程序默认是以GBK编码为准,但是JAVA读写字符串的固定编码为utf-8,需要在数据传输的时候以GBK编码方式传输,Thrift 的JAVA库中不支持以GBK方式传输字符串,而且预期不会增加如此功能(Support non-UTF-8 in Java and C#),原因参考Support non-UTF-8 in Java;c/c++中传输的字符串是字节序列,不存在编码的问题。通过阅读TProtocol相关代码,只需要重写readString 和writeString中的字符串读写编码方式即可,重写的类的完整实现如下:
 
[java] 
import java.io.UnsupportedEncodingException;  
import java.nio.ByteBuffer;  
import org.apache.thrift.ShortStack;  
import org.apache.thrift.TException;  
import org.apache.thrift.protocol.*;  
import org.apache.thrift.transport.TTransport;  
  
public class GBKCompactProtocol extends TProtocol {  
    private static final TStruct ANONYMOUS_STRUCT = new TStruct("");  
    private static final TField TSTOP = new TField("", (byte) 0, (short) 0);  
  
    private static final byte[] ttypeToCompactType = new byte[16];  
    private static final byte PROTOCOL_ID = -126;  
    private static final byte VERSION = 1;  
    private static final byte VERSION_MASK = 31;  
    private static final byte TYPE_MASK = -32;  
    private static final int TYPE_SHIFT_AMOUNT = 5;  
    private ShortStack lastField_ = new ShortStack(15);  
  
    private short lastFieldId_ = 0;  
  
    private TField booleanField_ = null;  
  
    private Boolean boolValue_ = null;  
  
    byte[] i32buf = new byte[5];  
  
    byte[] varint64out = new byte[10];  
  
    private byte[] byteDirectBuffer = new byte[1];  
  
    byte[] byteRawBuf = new byte[1];  
  
    public GBKCompactProtocol(TTransport transport) {  
        super(transport);  
    }  
  
    public void reset() {  
        this.lastField_.clear();  
        this.lastFieldId_ = 0;  
    }  
  
    public void writeMessageBegin(TMessage message)  
            throws TException {  
        writeByteDirect((byte) -126);  
        writeByteDirect(0x1 | message.type << 5 & 0xFFFFFFE0);  
        writeVarint32(message.seqid);  
        writeString(message.name);  
    }  
  
    public void writeStructBegin(TStruct struct)  
            throws TException {  
        this.lastField_.push(this.lastFieldId_);  
        this.lastFieldId_ = 0;  
    }  
  
    public void writeStructEnd()  
            throws TException {  
        this.lastFieldId_ = this.lastField_.pop();  
    }  
  
    public void writeFieldBegin(TField field)  
            throws TException {  
        if (field.type == 2) {  
            this.booleanField_ = field;  
        } else writeFieldBeginInternal(field, (byte) -1);  
    }  
  
    private void writeFieldBeginInternal(TField field, byte typeOverride)  
            throws TException {  
        byte typeToWrite = typeOverride == -1 ? getCompactType(field.type) : typeOverride;  
  
        if ((field.id > this.lastFieldId_) && (field.id - this.lastFieldId_ <= 15)) {  
            writeByteDirect(field.id - this.lastFieldId_ << 4 | typeToWrite);  
        } else {  
            writeByteDirect(typeToWrite);  
            writeI16(field.id);  
        }  
  
        this.lastFieldId_ = field.id;  
    }  
  
    public void writeFieldStop()  
            throws TException {  
        writeByteDirect((byte) 0);  
    }  
  
    public void writeMapBegin(TMap map)  
            throws TException {  
        if (map.size == 0) {  
            writeByteDirect(0);  
        } else {  
            writeVarint32(map.size);  
            writeByteDirect(getCompactType(map.keyType) << 4 | getCompactType(map.valueType));  
        }  
    }  
  
    public void writeListBegin(TList list)  
            throws TException {  
        writeCollectionBegin(list.elemType, list.size);  
    }  
  
    public void writeSetBegin(TSet set)  
            throws TException {  
        writeCollectionBegin(set.elemType, set.size);  
    }  
  
    public void writeBool(boolean b)  
            throws TException {  
        if (this.booleanField_ != null) {  
            writeFieldBeginInternal(this.booleanField_, (byte) (b ? 1 : 2));  
            this.booleanField_ = null;  
        } else {  
            writeByteDirect((byte) (b ? 1 : 2));  
        }  
    }  
  
    public void writeByte(byte b)  
            t
补充:软件开发 , Java ,
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,