/*
Eteria IRC Client, an RFC 1459 compliant client program written in Java.
Copyright (C) 2000-2001 Javier Kohen <jkohen at tough.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package ar.com.jkohen.irc;
import java.io.CharConversionException;
import java.io.UnsupportedEncodingException;
import java.util.StringTokenizer;
public class CharsConverter
{
private String decoding;
private String encoding;
public CharsConverter(String dec, String enc) throws UnsupportedEncodingException
{
/* This throws an exception if charset name is not supported */
byte arr[] = {};
if (!dec.equalsIgnoreCase("IRC-mixed") && !dec.equalsIgnoreCase("IRC") && !dec.equals(""))
new String(arr, dec);
if (!enc.equalsIgnoreCase("IRC-mixed") && !enc.equalsIgnoreCase("IRC") && !dec.equals(""))
new String(arr, enc);
decoding = dec;
encoding = enc;
}
public String decode(byte buffer[], int num) throws UnsupportedEncodingException
{
if (num > 0)
{
if (decoding.equalsIgnoreCase("IRC-mixed"))
{
/*
** This decoder is similar to "IRC Hybrid"
** except that a single message can mix Latin and UTF-8
** separated by spaces and commas.
*/
String str = "";
int start = 0, end = 0;
for(int i = 0; i < num; i++)
{
if (buffer[i] == ' ' || buffer[i] == ',' || i + 1 == num)
{
if (i + 1 != num)
end = i;
else
end = num;
try
{
str += UTF8ToLatin(buffer, start, end);
}
catch (CharConversionException e)
{
str += new String(buffer, start, end - start, "ISO_8859-1");
}
start = i;
}
}
return(str);
}
else if (decoding.equalsIgnoreCase("IRC"))
{
/* Similar to "IRC-Hybrid" on XChat */
try
{
return(UTF8ToLatin(buffer, 0, num));
}
catch (CharConversionException e)
{
return(new String(buffer, 0, num, "ISO_8859-1"));
}
}
else if (!decoding.equals(""))
{
return(new String(buffer, 0, num, decoding));
}
}
return(new String(buffer, 0, num));
}
public byte[] encode(String str) throws UnsupportedEncodingException
{
if (encoding.equalsIgnoreCase("IRC-mixed"))
{
byte dest[] = {};
StringTokenizer st = new StringTokenizer(str, " ,", true);
while (st.hasMoreTokens())
{
String s = st.nextToken();
byte chunk[] = {};
if(isLatin(s))
chunk = s.getBytes("ISO_8859-1");
else
chunk = s.getBytes("UTF8");
byte temp[] = new byte[dest.length + chunk.length];
for (int i = 0; i < dest.length; i++)
temp[i] = dest[i];
for (int i = 0; i < chunk.length; i++)
temp[dest.length + i] = chunk[i];
dest = temp;
}
return(dest);
}
else if (encoding.equalsIgnoreCase("IRC"))
{
if (isLatin(str))
return(str.getBytes("ISO_8859-1"));
else
return(str.getBytes("UTF8"));
}
else if (!encoding.equals(""))
{
return(str.getBytes(encoding));
}
return(str.getBytes());
}
private boolean isLatin(String str)
{
char chars[] = str.toCharArray();
char c = 0;
for (int i = 0; i < chars.length; i++)
{
c = chars[i];
if ((c >= 0x7F && c <= 0x9F) || c > 0xFF)
return(false);
}
return(true);
}
public boolean isLatin(byte buffer[], int start, int end)
{
int c = 0;
for (int i = start; i < end; i++)
{
c = buffer[i] & 0xFF;
if ((c >= 0x7F && c <= 0x9F) || c > 0xFF)
return(false);
}
return(true);
}
public String UTF8ToLatin(byte buffer[], int start, int end) throws CharConversionException
{
StringBuffer decoded = new StringBuffer();
int pos = start;
int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
boolean valid = false;
char c = 0;
while(pos < end)
{
valid = false;
c1 = buffer[pos++] & 0xFF;
if (c1 < 0x80)
{
c = (char)c1;
valid = true;
}
else if (c1 < 0xC2)
{
valid = false;
}
else if (c1 < 0xE0 && pos < end)
{
c2 = buffer[pos++] & 0xFF;
if ((c2 & 0xC0) == 0x80)
{
c1 = (c1 & 0x1F) << 6;
c2 &= 0x3F;
c = (char)(c1 | c2);
valid = true;
}
}
else if (c1 < 0xF0 && (pos + 1 < end))
{
c2 = buffer[pos++] & 0xFF;
c3 = buffer[pos++] & 0xFF;
if ((c2 & c3 & 0xC0) == 0x80)
{
c1 = (c1 & 0x0F) << 12;
c2 = (c2 & 0x3F) << 6;
c3 &= 0x3F;
c = (char)(c1 | c2 | c3);
valid = true;
}
}
else if (c1 < 0xF5 && (pos + 2 < end))
{
c2 = buffer[pos++] & 0xFF;
c3 = buffer[pos++] & 0xFF;
c4 = buffer[pos++] & 0xFF;
if ((c2 & c3 & c4 & 0xC0) == 0x80)
{
c1 = (c1 & 0x07) << 18;
c2 = (c2 & 0x3F) << 12;
c3 = (c3 & 0x3F) << 6;
c4 &= 0x3F;
c = (char)(c1 | c2 | c3 | c4);
valid = true;
}
}
if (!valid)
throw new CharConversionException("UTF-8 not valid at " + pos);
decoded.append(c);
}
return(decoded.toString());
}
}