Windows Phone SDK 是不支援 BIG5 編碼的,它只支援三種編碼。
BigEndianUnicode、Unicode、UTF8
因此若要讀取 BIG5 的網頁,就必須自行將 BIG5 轉換為 Unicode,實作的主要重點在於:
- 取得 BIG5 –> Unicode 轉換表。(BIG5.TXT)
- 將轉換表改用 Dictionary 型態儲存。
- 讀取網頁時,用 stream,不要用 WebClient!
使用 WebClient 來讀取網頁,得到的並非是 raw data,而是已被 default encoding 轉換過的 data。更進一步解釋,就是使用了 UTF8 –> Unicode 轉換表來轉換 BIG5 的資料,這會導致資料整個變成不可用的亂碼。
實作
將 BIG5.TXT 加入專案
在 ContentPanel 加入一個 textBlock
MainPage.xaml.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Animation;
using System.Windows.Shapes;
using Microsoft.Phone.Controls;
using System.IO;
using System.Globalization;
using System.Diagnostics;
using System.Text;
namespace PhoneApp2
{
public partial class MainPage : PhoneApplicationPage
{
// async http
delegate void DownDelegate(string content);
DownDelegate downDelegate;
// Big5 to Unicode mapping table
private static Dictionary<int, int> mBIG5_Unicode_MAP = new Dictionary<int, int>();
// 建構函式
public MainPage()
{
InitializeComponent();
createBig5ToUnicodeDictionary();
readBig5WebPage();
}
private void setConent(string content)
{
textBlock1.Text = content;
}
private void createBig5ToUnicodeDictionary()
{
var resource = Application.GetResourceStream(new Uri("BIG5.TXT", UriKind.Relative));
StreamReader sr = new StreamReader(resource.Stream);
string line;
while ((line = sr.ReadLine()) != null)
{
// 忽略註解
if (line.StartsWith("#")) continue;
string[] lTokens = line.Split(new char[] {'\t'});
mBIG5_Unicode_MAP.Add(hexToInt(lTokens[0].Substring(2)), hexToInt(lTokens[1].Substring(2)));
}
}
private void readBig5WebPage()
{
textBlock1.Text = "讀取中...";
string url = "http://www.businessweekly.com.tw/feednews.php";
downDelegate = setConent;
System.Net.WebRequest request = HttpWebRequest.Create(url);
IAsyncResult result = request.BeginGetResponse(ResponseCallback, request);
}
private void ResponseCallback(IAsyncResult result)
{
HttpWebRequest request = (HttpWebRequest)result.AsyncState;
WebResponse response = request.EndGetResponse(result);
Stream s = response.GetResponseStream();
Dispatcher.BeginInvoke(downDelegate, big5ToUnicode(s).ToString());
}
private StringBuilder big5ToUnicode(Stream s)
{
StringBuilder lSB = new StringBuilder();
byte[] big5Buffer = new byte[2];
int input;
while ((input = s.ReadByte()) != -1)
{
if (input > 0x81 && big5Buffer[0] == 0)
{
big5Buffer[0] = (byte)input;
}
else if (big5Buffer[0] != 0)
{
big5Buffer[1] = (byte)input;
int Big5Char = (big5Buffer[0] << 8) + big5Buffer[1];
try
{
int UTF8Char = mBIG5_Unicode_MAP[Big5Char];
lSB.Append((char)UTF8Char);
}
catch (Exception)
{
lSB.Append((char)mBIG5_Unicode_MAP[0xA148]);
}
big5Buffer = new byte[2];
}
else
{
lSB.Append((char)input);
}
}
return lSB;
}
private int hexToInt(string hexString)
{
return int.Parse(hexString, NumberStyles.HexNumber);
}
}
}
參考連結