{"id":1494,"date":"2023-02-13T09:25:14","date_gmt":"2023-02-13T01:25:14","guid":{"rendered":"http:\/\/www.algmain.com\/?page_id=1494"},"modified":"2023-02-13T15:04:46","modified_gmt":"2023-02-13T07:04:46","slug":"clean-resource","status":"publish","type":"page","link":"http:\/\/www.algmain.com\/index.php\/nlp-history\/resource-processing\/clean-resource\/","title":{"rendered":"\u8bed\u6599\u6e05\u6d17"},"content":{"rendered":"\n<p>\u8bed\u6599\u5728\u5bfc\u5165\u5230\u6b63\u5f0f\u8868\u4ee5\u540e\uff0c\u5728\u8fdb\u884c\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e4b\u524d\u9700\u8981\u505a\u9002\u5f53\u7684\u8bed\u6599\u6e05\u6d17\u3002\u6e05\u6d17\u540e\u7684\u8bed\u6599\u5c06\u4f1a\u53d8\u5f97\u66f4\u52a0\u201c\u5e72\u51c0\u201d\u5e76\u6709\u5229\u4e8e\u540e\u671f\u5206\u6790\u3002\u8fd9\u4e9b\u5747\u7531C#\u51fd\u6570\u6765\u5b9e\u73b0\u3002<\/p>\n\n\n\n<p>\u8bed\u6599\u6e05\u6d17\u7684\u603b\u4f53\u76ee\u7684\u6709\u5982\u4e0b\u51e0\u70b9\uff1a<\/p>\n\n\n\n<p>\uff081\uff09\u6e05\u7406\u7a7a\u767d\u5b57\u7b26<br>\u6e05\u7406\u7a7a\u767d\u5b57\u7b26\u7684\u76ee\u7684\u5c31\u662f\uff1a\u5c06\u4e0d\u53ef\u89c1\u5b57\u7b26\u6216\u7a7a\u683c\u5b57\u7b26\u66f4\u6362\u6210\u5355\u4e00\u7684\u7a7a\u683c\u5b57\u7b26\u3002<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: csharp; title: ; notranslate\" title=\"\">\n    public static bool IsInvisible(char cValue)\n    {\n        \/\/ Unicode\u4e0d\u53ef\u89c1\u533a\u57df\n        switch ((int)cValue)\n        {\n            case 0x1680:\n            case 0x180E:\n            case 0x2028:\n            case 0x2029:\n            case 0x202F:\n            case 0x205F:\n            case 0x2060:\n            case 0x3000:\n            case 0xFEFF:\n                return true;\n        }\n        \/\/ Unicode\u4e0d\u53ef\u89c1\u533a\u57df\n        if (cValue &gt;= 0xD7B0 &amp;&amp;\n            cValue &lt;= 0xF8FF) return true;\n        \/\/ Unicode\u4e0d\u53ef\u89c1\u533a\u57df\n        if (cValue &gt;= 0xFFF0 &amp;&amp;\n            cValue &lt;= 0xFFFF) return true;\n        \/\/ Unicode\u4e0d\u53ef\u89c1\u533a\u57df\n        if (cValue &gt;= 0x2000 &amp;&amp;\n            cValue &lt;= 0x200D) return true;\n        \/\/ \u8fd4\u56de\u7ed3\u679c\n        return cValue &lt; 32 || cValue == 0x7F;\n    }\n<\/pre><\/div>\n\n\n<p>\u6ce8\u610f\uff1a\u4e0d\u8981\u968f\u610f\u5220\u9664\u7a7a\u683c\u548c\u4e0d\u53ef\u89c1\u5b57\u7b26\u3002\u5426\u5219\uff0c\u53ef\u80fd\u4f1a\u5f15\u8d77\u8bed\u6599\u7684\u8bed\u4e49\u53d1\u751f\u660e\u663e\u53d8\u5316\u3002<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: csharp; title: ; notranslate\" title=\"\">\n    public static string ClearInvisible(string strValue)\n    {\n        \/\/ \u5c06\u4e0d\u53ef\u89c1\u5b57\u7b26\u66ff\u6362\u6210\u7a7a\u683c\n        return Regex.Replace(strValue, @&quot;(&#x5B;\\x00-\\x1F]|\\x7F|\\u1680|\\u180E|&#x5B;\\u2000-\\u200D]|&#x5B;\\u2028-\\u2029]|\\u202F|&#x5B;\\u205F-\\u2060]|\\u3000|&#x5B;\\uD7B0-\\uF8FF]|\\uFEFF|&#x5B;\\uFFF0-\\uFFFF])+&quot;, &quot; &quot;);\n    }\n<\/pre><\/div>\n\n\n<p>\uff082\uff09\u5c06\u5168\u89d2\u5b57\u7b26\u66f4\u6362\u6210\u534a\u89d2\u5b57\u7b26<br>\u5168\u89d2\u5b57\u7b26\u7edf\u4e00\u81f3\u534a\u89d2\u5b57\u7b26\u4e4b\u540e\uff0c\u66f4\u6709\u5229\u4e8e\u7a0b\u5e8f\u5904\u7406\u3002<br>\u9700\u8981\u6ce8\u610f\u7684\u662f\uff1a\u4e0d\u662f\u6240\u6709\u7684\u7b26\u53f7\u90fd\u80fd\u8f6c\u6362\u6210\u534a\u89d2\u7684\u3002\u4f8b\u5982\uff1a\u5168\u89d2\u7684\u6b63\u53cd\u53cc\u5f15\u53f7\u662f\u6ca1\u6709\u5bf9\u5e94\u7684\u534a\u89d2\u5b57\u7b26\u3002<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: csharp; title: ; notranslate\" title=\"\">\n    public static string NarrowConvert(string strValue)\n    {\n        \/\/ \u5168\u89d2\u8f6c\u534a\u89d2\n        \/\/return Strings.StrConv(strValue.Value, VbStrConv.Narrow, 0);\n\n        \/\/ \u521b\u5efa\u5b57\u7b26\u4e32\n        StringBuilder sb = new StringBuilder(strValue.Length);\n        \/\/ \u5faa\u73af\u5904\u7406\n        foreach (char cValue in strValue)\n        {\n            \/\/ \u7279\u6b8a\u5904\u7406\n            if (cValue == 12288) sb.Append(&#039; &#039;);\n            \/\/ \u68c0\u67e5\u5b57\u7b26\u8303\u56f4\n            else if (cValue &lt; 65281) sb.Append(cValue);\n            else if (cValue &gt; 65374) sb.Append(cValue);\n            \/\/ \u8f6c\u6362\u6210\u534a\u89d2\n            else sb.Append((char)(cValue - 65248));\n        }\n        \/\/ \u8fd4\u56de\u7ed3\u679c\n        return sb.ToString();\n    }\n\n    public static string WideConvert(string strValue)\n    {\n        \/\/ \u534a\u89d2\u8f6c\u5168\u89d2\n        \/\/return Strings.StrConv(strValue.Value, VbStrConv.Wide, 0);\n\n        \/\/ \u521b\u5efa\u5b57\u7b26\u4e32\n        StringBuilder sb = new StringBuilder(strValue.Length);\n        \/\/ \u5faa\u73af\u5904\u7406\n        foreach (char cValue in strValue)\n        {\n            \/\/ \u7279\u6b8a\u5904\u7406\n            if (cValue == 32) sb.Append((char)12288);\n            \/\/ \u68c0\u67e5\u5b57\u7b26\u8303\u56f4\n            else if (cValue &lt; 33) sb.Append(cValue);\n            else if (cValue &gt; 126) sb.Append(cValue);\n            \/\/ \u8f6c\u6362\u6210\u5168\u89d2\n            else sb.Append((char)(cValue + 65248));\n        }\n        \/\/ \u8fd4\u56de\u7ed3\u679c\n        return sb.ToString();\n    }\n<\/pre><\/div>\n\n\n<p>\uff083\uff09XML\u53cd\u8f6c\u4e49<br>XML\u53cd\u8f6c\u4e49\u4e5f\u5c31\u662f\u5c06\u8f6c\u4e49\u7684\u5b57\u7b26\uff0c\u8fd8\u539f\u6210\u539f\u59cb\u5b57\u7b26\u3002\u4f8b\u5982\uff1a&amp;nbsp; \u5bf9\u5e94\u7740\u7a7a\u683c\u3002\u66f4\u4e3a\u590d\u6742\u4e00\u70b9\u7684\u662f\u201c&amp;#\u201d\u5f00\u5934\u548c\u201c&amp;x\u201d\u5f00\u5934\u7684\u8f6c\u4e49\u5b57\u7b26\u3002\u9700\u8981\u901a\u8fc7\u83b7\u5f9710\u8fdb\u5236\u6216\u800516\u8fdb\u5236\u6570\u503c\u8fdb\u884c\u8fd8\u539f\u3002<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: csharp; title: ; notranslate\" title=\"\">\n    public static string XMLUnescape(string strValue)\n    {\n        \/\/ \u8bb0\u5f55\u65e5\u5fd7\n        \/\/Log.LogMessage(&quot;XML&quot;, &quot;XMLUnescape&quot;, &quot;\u5f00\u59cb\u53cd\u8f6c\u4e49\uff01&quot;);\n\n        \/\/ \u521b\u5efa\u8bcd\u5178\n        Dictionary&lt;string, string&gt; escapes = new Dictionary&lt;string, string&gt;();\n        \/\/ \u5339\u914d\u5faa\u73af\n        foreach (Match item in Regex.Matches(strValue, @&quot;&amp;#&#x5B;0-9|o|O|l]{1,5};&quot;))\n        {\n            \/\/ \u8bb0\u5f55\u65e5\u5fd7\n            \/\/Log.LogMessage(&quot;XML&quot;, &quot;XMLUnescape&quot;, item.Value);\n\n            int value = 0;\n            \/\/ \u83b7\u5f97\u6570\u5b57\u90e8\u5206\n            string strNumber =\n                item.Value.Substring(2, item.Value.Length - 3);\n            \/\/ \u68c0\u67e5\u7ed3\u679c\n            if (strNumber.IndexOfAny(errors) &gt;= 0)\n            {\n                \/\/ \u5c06l\u66ff\u6362\u62101\n                strNumber = strNumber.Replace(&quot;l&quot;, &quot;1&quot;);\n                \/\/ \u5c06o\u66ff\u6362\u62100\n                strNumber = strNumber.Replace(&quot;o&quot;, &quot;0&quot;);\n                \/\/ \u5c06O\u66ff\u6362\u62100\n                strNumber = strNumber.Replace(&quot;O&quot;, &quot;0&quot;);\n            }\n            \/\/ \u5c1d\u8bd5\u89e3\u6790\n            value = System.Convert.ToInt32(strNumber);\n            \/\/ \u52a0\u5165\u8bcd\u5178\n            if (!escapes.ContainsKey(item.Value))\n                escapes.Add(item.Value, new string((char)value, 1));\n        }\n        \/\/ \u5339\u914d\u5faa\u73af\n        foreach (Match item in Regex.Matches(strValue, @&quot;&amp;#&#x5B;x|X](&#x5B;0-9|a-f|A-F|o|O|l]{1,4});&quot;))\n        {\n            \/\/ \u8bb0\u5f55\u65e5\u5fd7\n            \/\/Log.LogMessage(&quot;XML&quot;, &quot;XMLUnescape&quot;, item.Value);\n\n            int value = 0;\n            \/\/ \u83b7\u5f97\u6570\u5b57\u90e8\u5206\n            string strNumber =\n                item.Value.Substring(3, item.Value.Length - 4);\n            \/\/ \u68c0\u67e5\u7ed3\u679c\n            if (strNumber.IndexOfAny(errors) &gt;= 0)\n            {\n                \/\/ \u5c06l\u66ff\u6362\u62101\n                strNumber = strNumber.Replace(&quot;l&quot;, &quot;1&quot;);\n                \/\/ \u5c06o\u66ff\u6362\u62100\n                strNumber = strNumber.Replace(&quot;o&quot;, &quot;0&quot;);\n                \/\/ \u5c06O\u66ff\u6362\u62100\n                strNumber = strNumber.Replace(&quot;O&quot;, &quot;0&quot;);\n            }\n            \/\/ \u8f6c\u6362\n            value = System.Convert.ToInt32(strNumber, 16);\n            \/\/ \u52a0\u5165\u8bcd\u5178\n            if (!escapes.ContainsKey(item.Value))\n                escapes.Add(item.Value, new string((char)value, 1));\n        }\n        \/\/ \u5f00\u59cb\u66ff\u6362\n        foreach (KeyValuePair&lt;string, string&gt; kvp in escapes)\n        {\n            \/\/ \u6267\u884c\u66ff\u6362\u64cd\u4f5c\n            strValue = strValue.Replace(kvp.Key, kvp.Value);\n        }\n        \/\/ \u5c06\u5b57\u7b26\u4e32\u8f6c\u4e49\u8fd8\u539f\n        foreach (string&#x5B;] item in ESCAPES)\n        {\n            \/\/ \u6267\u884c\u66ff\u6362\u64cd\u4f5c\n            if (strValue.Contains(item&#x5B;0])) strValue = strValue.Replace(item&#x5B;0], item&#x5B;1]);\n        }\n        \/\/ \u8bb0\u5f55\u65e5\u5fd7\n        \/\/Log.LogMessage(&quot;XML&quot;, &quot;XMLUnescape&quot;, &quot;\u53cd\u8f6c\u4e49\u7ed3\u675f\uff01&quot;);\n        \/\/ \u8fd4\u56de\u7ed3\u679c\n        return strValue;\n    }\n<\/pre><\/div>\n\n\n<p>\u8003\u8651\u5230\u539f\u59cb\u6570\u636e\u53ef\u80fd\u591a\u6b21\u7ecf\u8fc7HTML\u8f6c\u4e49\uff0c\u56e0\u6b64\u4ee5\u4e0a\u4e09\u4e2a\u6b65\u9aa4\u9700\u8981\u53cd\u590d\u6267\u884c\uff0c\u76f4\u81f3\u5185\u5bb9\u4e0d\u518d\u53d1\u751f\u53d8\u5316\u4e3a\u6b62\u3002<\/p>\n\n\n\n<p>\u6ce8\u610f\uff1a\u4ee3\u7801\u4e4b\u4e2d\u9009\u62e9\u6027\u5730\u5bf9\u5c0f\u5199\u5b57\u6bcdo\u3001\u5927\u5199\u5b57\u6bcdO\uff0c\u5c0f\u5199\u5b57\u6bcdl\u505a\u5904\u7406\uff0c\u662f\u6e90\u4e8e\u5b9e\u9645\u6570\u636e\u7684\u6df7\u4e71\u3002\u5f88\u591a\u7f51\u7edc\u6587\u672c\u6570\u636e\u4f1a\u5c06\u8fd9\u4e09\u4e2a\u5b57\u6bcd\u5f530\u548c1\u3002<\/p>\n\n\n\n<p>\uff084\uff09\u66ff\u6362<br>\u66ff\u6362\u7684\u76ee\u7684\uff1a\u5c31\u662f\u5229\u7528\u6b63\u5219\u5339\u914d\u89c4\u5219\u5c06\u7b26\u5408\u89c4\u5219\u7684\u5b57\u7b26\u4e32\u66ff\u6362\u6210\u5176\u4ed6\u5b57\u7b26\u4e32\u3002\u4e00\u822c\u7528\u4e8e\u6e05\u7406\u591a\u4f59\u7684\u6807\u70b9\u7b26\u53f7\uff0c\u6216\u8005\u4e0d\u7b26\u5408\u89c4\u683c\u7684\u7b26\u53f7\u6807\u8bb0\u3002\u8fd9\u4e9b\u66ff\u6362\u89c4\u5219\u53ef\u4ee5\u5b58\u50a8\u4e8e\u6570\u636e\u8868\u4e2d\u3002\u5728\u7cfb\u7edf\u52a0\u8f7d\u524d\uff0c\u5168\u90e8\u88c5\u5165\u5185\u5b58\u4e4b\u4e2d\uff0c\u4ee5\u52a0\u5feb\u5904\u7406\u901f\u5ea6\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"924\" src=\"http:\/\/www.algmain.com\/wp-content\/uploads\/2023\/02\/v2-09352c5aade8a764c44e9e763116fd0e_1440w-1024x924.webp\" alt=\"\" class=\"wp-image-1495\" srcset=\"http:\/\/www.algmain.com\/wp-content\/uploads\/2023\/02\/v2-09352c5aade8a764c44e9e763116fd0e_1440w-1024x924.webp 1024w, http:\/\/www.algmain.com\/wp-content\/uploads\/2023\/02\/v2-09352c5aade8a764c44e9e763116fd0e_1440w-300x271.webp 300w, http:\/\/www.algmain.com\/wp-content\/uploads\/2023\/02\/v2-09352c5aade8a764c44e9e763116fd0e_1440w-768x693.webp 768w, http:\/\/www.algmain.com\/wp-content\/uploads\/2023\/02\/v2-09352c5aade8a764c44e9e763116fd0e_1440w.webp 1128w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><figcaption class=\"wp-element-caption\">\u56fe1 \u66ff\u6362\u89c4\u5219\u8868<\/figcaption><\/figure>\n\n\n\n<p>\u66ff\u6362\u89c4\u5219\u4e5f\u662f\u9700\u8981\u8fdb\u884c\u53cd\u590d\u6267\u884c\uff0c\u76f4\u81f3\u5185\u5bb9\u4e0d\u518d\u53d1\u751f\u6539\u53d8\u4e3a\u6b62\u3002<\/p>\n\n\n\n<p>\u8fd9\u4e9b\u89c4\u5f8b\u4e5f\u53ef\u4ee5\u76f4\u63a5\u56fa\u5316\u5728\u7a0b\u5e8f\u4e4b\u4e2d\u3002<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: csharp; title: ; notranslate\" title=\"\">\n        \/\/ \u8fc7\u6ee4\u89c4\u5219\n        private static readonly string&#x5B;]&#x5B;] FILTER_RULES =\n        {\n            new string&#x5B;] {&quot;(\\\\u0020)\\\\s&quot;, &quot; &quot;},\n\n            new string&#x5B;] {&quot;(&#039;){2,}&quot;, &quot;&#039;&quot; },\n            new string&#x5B;] {&quot;(`){2,}&quot;, &quot;`&quot; },\n            new string&#x5B;] {&quot;(&lt;){2,}&quot;, &quot;&lt;&quot; },\n            new string&#x5B;] {&quot;(&gt;){2,}&quot;, &quot;&gt;&quot; },\n            new string&#x5B;] {&quot;(-){2,}&quot;, &quot;\u2014&quot; },\n            new string&#x5B;] {&quot;(\u3001){2,}&quot;, &quot;\u3001&quot; },\n            new string&#x5B;] {&quot;(\uff5e){2,}&quot;, &quot;\uff5e&quot; },\n            new string&#x5B;] {&quot;(\u2014){2,}&quot;, &quot;\u2014&quot; },\n\n            new string&#x5B;] {&quot;(\u2026){2,}&quot;, &quot;\u2026&quot; },\n            new string&#x5B;] {&quot;(\\\\.){3,}&quot;, &quot;\u2026&quot; },\n\n            new string&#x5B;] {&quot;\uff0c(\uff0c|\uff1a|\\\\s)*\uff0c&quot;, &quot;\uff0c&quot; },\n            new string&#x5B;] {&quot;\uff0c(\uff0c|\uff1a|\\\\s)*\uff1a&quot;, &quot;\uff1a&quot; },\n            new string&#x5B;] {&quot;\uff0c(\uff0c|\uff1a|\\\\s)*\u3002&quot;, &quot;\u3002&quot; },\n            new string&#x5B;] {&quot;\uff0c(\uff0c|\uff1a|\\\\s)*\uff1b&quot;, &quot;\uff1b&quot; },\n            new string&#x5B;] {&quot;\uff0c(\uff0c|\uff1a|\\\\s)*\uff1f&quot;, &quot;\uff1f&quot; },\n            new string&#x5B;] {&quot;\uff0c(\uff0c|\uff1a|\\\\s)*\uff01&quot;, &quot;\uff01&quot; },\n\n            new string&#x5B;] {&quot;\uff1a(\uff0c|\uff1a|\\\\s)*\uff0c&quot;, &quot;\uff1a&quot; },\n            new string&#x5B;] {&quot;\uff1a(\uff0c|\uff1a|\\\\s)*\uff1a&quot;, &quot;\uff1a&quot; },\n            new string&#x5B;] {&quot;\uff1a(\uff0c|\uff1a|\\\\s)*\u3002&quot;, &quot;\u3002&quot; },\n            new string&#x5B;] {&quot;\uff1a(\uff0c|\uff1a|\\\\s)*\uff1b&quot;, &quot;\uff1b&quot; },\n            new string&#x5B;] {&quot;\uff1a(\uff0c|\uff1a|\\\\s)*\uff1f&quot;, &quot;\uff1f&quot; },\n            new string&#x5B;] {&quot;\uff1a(\uff0c|\uff1a|\\\\s)*\uff01&quot;, &quot;\uff01&quot; },\n\n            new string&#x5B;] {&quot;\u3002(\uff0c|\uff1a|\u3002|\uff1b|\uff1f|\uff01|\\\\s)+&quot;, &quot;\u3002&quot; },\n            new string&#x5B;] {&quot;\uff1b(\uff0c|\uff1a|\u3002|\uff1b|\uff1f|\uff01|\\\\s)+&quot;, &quot;\uff1b&quot; },\n            new string&#x5B;] {&quot;\uff1f(\uff0c|\uff1a|\u3002|\uff1b|\uff1f|\uff01|\\\\s)+&quot;, &quot;\uff1f&quot; },\n            new string&#x5B;] {&quot;\uff01(\uff0c|\uff1a|\u3002|\uff1b|\uff1f|\uff01|\\\\s)+&quot;, &quot;\uff01&quot; },\n\n            new string&#x5B;] {&quot;&lt;(br|hr|input)((\\\\s|\\\\.)*)\/&gt;&quot;, &quot; &quot; },\n            new string&#x5B;] {&quot;&lt;(img|doc|url|input)((\\\\s|\\\\.)*)&gt;&quot;, &quot; &quot; },\n            new string&#x5B;] {&quot;&lt;&#x5B;a-zA-Z]+\\\\s*&#x5B;^&gt;]*&gt;(.*?)&lt;\/&#x5B;a-zA-Z]+&gt;&quot;, &quot;$1&quot; },\n\n            new string&#x5B;] {&quot;\\\\s(\\\\&lt;|\\\\&gt;|\u3010|\u3011|\u3008|\u3009|\u201c|\u201d|\u2018|\u2019|\u300a|\u300b|\\\\(|\\\\)|\uff08|\uff09|\uff3b|\uff3d|\uff5b|\uff5d|\u2026|\uff5e|\u2014|\u3001|\uff1f|\uff01|\uff1b|\u3002|\uff1a|\uff0c)&quot;, &quot;$1&quot; },\n            new string&#x5B;] {&quot;(\\\\&lt;|\\\\&gt;|\u3010|\u3011|\u3008|\u3009|\u201c|\u201d|\u2018|\u2019|\u300a|\u300b|\\\\(|\\\\)|\uff08|\uff09|\uff3b|\uff3d|\uff5b|\uff5d|\u2026|\uff5e|\u2014|\u3001|\uff1f|\uff01|\uff1b|\u3002|\uff1a|\uff0c)\\\\s&quot;, &quot;$1&quot; }\n        };\n\n<\/pre><\/div>\n\n\n<p>\u7ecf\u8fc7\u4ee5\u4e0a\u5904\u7406\uff0c\u539f\u59cb\u8bed\u6599\u7b97\u662f\u521d\u6b65\u6e05\u7406\u5e72\u51c0\uff0c\u53ef\u4ee5\u8fdb\u884c\u4e0b\u4e00\u6b65\u7684\u64cd\u4f5c\u3002<\/p>\n\n\n\n<p>\u6e05\u6d17\u51fd\u6570\u5de5\u4f5c\u6548\u7387\u5e76\u4e0d\u662f\u5f88\u9ad8\u3002\u4e3a\u4e86\u52a0\u5feb\u5904\u7406\u901f\u5ea6\uff0c\u5efa\u8bae\u5c06\u8fc7\u6ee4\u540e\u7684\u6570\u636e\u518d\u53e6\u5b58\u81f3\u4e00\u5f20\u6570\u636e\u8868\u4e2d\u3002\u5176\u4ed6\u540e\u7eed\u7684\u5de5\u4f5c\uff0c\u90fd\u4f9d\u636e\u8fc7\u6ee4\u540e\u7684\u6570\u636e\u8fdb\u884c\u5904\u7406\u3002<\/p>\n\n\n\n<p>\u5728\u6587\u7ae0\u7684\u7ed3\u5c3e\u4ecb\u7ecd\u4e00\u4e0b\u4e24\u4e2aC#\u5e93\uff0c\u540e\u9762\u4f1a\u88ab\u7ecf\u5e38\u4f7f\u7528\u5230\u3002<\/p>\n\n\n\n<p>\uff081\uff09using Microsoft.VisualBasic;<\/p>\n\n\n\n<p>\u8be5\u5e93\u4e3b\u8981\u6d89\u53ca\u7b80\u4f53\u7e41\u4f53\u76f8\u4e92\u8f6c\u6362\uff0c\u534a\u89d2\u548c\u5168\u89d2\u7684\u76f8\u4e92\u8f6c\u6362\u3002<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: csharp; title: ; notranslate\" title=\"\">\n    public static string TraditionalConvert(string strValue)\n    {\n        \/\/ \u8f6c\u7e41\u4f53\n        return Strings.StrConv(strValue, VbStrConv.TraditionalChinese, 0);\n    }\n\n    public static string SimplifiedConvert(string strValue)\n    {\n        \/\/ \u8f6c\u7b80\u4f53\n        return Strings.StrConv(strValue, VbStrConv.SimplifiedChinese, 0);\n    }\n<\/pre><\/div>\n\n\n<p>\uff082\uff09using System.Text.RegularExpressions;<\/p>\n\n\n\n<p>\u8be5\u5e93\u4e3b\u8981\u6d89\u53ca\u57fa\u4e8e\u6b63\u5219\u8868\u8fbe\u5f0f\u7684\u5339\u914d\u548c\u66ff\u6362\u3002<\/p>\n\n\n<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: csharp; title: ; notranslate\" title=\"\">\n    &#x5B;Microsoft.SqlServer.Server.SqlFunction]\n    public static SqlString RegExMatch(SqlString pattern, SqlString input)\n    {\n        \/\/ \u68c0\u67e5\u53c2\u6570\n        if(input.IsNull || pattern.IsNull) return String.Empty;\n        \/\/ \u8fd4\u56de\u5339\u914d\u7ed3\u679c\n        return Regex.Match(input.Value, pattern.Value, RegexOptions.None).Value;\n    }\n\n    &#x5B;Microsoft.SqlServer.Server.SqlFunction]\n    public static SqlBoolean RegExIsMatch(SqlString pattern, SqlString input)\n    {\n        \/\/ \u68c0\u67e5\u53c2\u6570\n        if (input.IsNull || pattern.IsNull) return SqlBoolean.False;\n        \/\/ \u8fd4\u56de\u5339\u914d\u7ed3\u679c\n        return Regex.IsMatch(input.Value, pattern.Value, RegexOptions.None);\n    }\n\n    &#x5B;Microsoft.SqlServer.Server.SqlFunction]\n    public static SqlInt32 RegExIndex(SqlString pattern, SqlString input)\n    {\n        \/\/ \u68c0\u67e5\u53c2\u6570\n        if (input.IsNull || pattern.IsNull) return -1;\n        \/\/ \u8fd4\u56de\u5339\u914d\u7ed3\u679c\n        return Regex.Match(input.Value, pattern.Value, RegexOptions.None).Index;\n    }\n\n    &#x5B;Microsoft.SqlServer.Server.SqlFunction\n        (DataAccess = DataAccessKind.Read,\n            FillRowMethodName = &quot;RegExSplit_FillRow&quot;,\n            TableDefinition = &quot;SplitValue nvarchar(4000)&quot;)]\n    public static IEnumerable RegExSplit(SqlString pattern, SqlString input)\n    {\n        \/\/ \u68c0\u67e5\u53c2\u6570\n        if (input.IsNull || pattern.IsNull) return null;\n        \/\/ \u8fd4\u56de\u7ed3\u679c\n        return Regex.Split(input.Value, pattern.Value, RegexOptions.None);\n    }\n\n    &#x5B;Microsoft.SqlServer.Server.SqlFunction]\n    public static SqlString RegExReplace(SqlString pattern, SqlString input, SqlString replacement)\n    {\n        \/\/ \u68c0\u67e5\u53c2\u6570\n        if (input.IsNull || pattern.IsNull) return SqlString.Null;\n        \/\/ \u8fd4\u56de\u5339\u914d\u7ed3\u679c\n        return Regex.Replace(input.Value, pattern.Value, replacement.Value, RegexOptions.None);\n    }\n\n    &#x5B;Microsoft.SqlServer.Server.SqlFunction\n        (DataAccess = DataAccessKind.Read,\n            FillRowMethodName = &quot;RegExMatches_FillRow&quot;,\n            TableDefinition = &quot;MatchValue nvarchar(4000), MatchIndex int, MatchLength int&quot;)]\n    public static IEnumerable RegExMatches(SqlString pattern, SqlString input)\n    {\n        \/\/ \u68c0\u67e5\u53c2\u6570\n        if (input.IsNull || pattern.IsNull) return null;\n        \/\/ \u8fd4\u56de\u7ed3\u679c\n        return Regex.Matches(input.Value, pattern.Value, RegexOptions.None);\n    }\n<\/pre><\/div>\n\n\n<p>\u6ce8\u610f\uff1aC#\u7684\u53c2\u6570\u987a\u5e8f\u548cVB\u7684\u53c2\u6570\u987a\u5e8f\u4e0d\u4e00\u81f4\u3002\u53e6\u5916VB\u7684\u6570\u7ec4\u7d22\u5f15\u4ece1\u8d77\uff0c\u800cC#\u662f\u4ece0\u8d77\u3002<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p><strong>\u77e5\u4e4e\uff1a<\/strong><a rel=\"noreferrer noopener\" href=\"https:\/\/zhuanlan.zhihu.com\/p\/539068221\" target=\"_blank\">\u6211\u7684NLP\uff08\u81ea\u7136\u8bed\u8a00\u5904\u7406\uff09\u5386\u7a0b\uff087\uff09\u2014\u2014\u8bed\u6599\u6e05\u6d17<\/a><\/p>\n<\/blockquote>\n","protected":false},"excerpt":{"rendered":"<p>\u8bed\u6599\u5728\u5bfc\u5165\u5230\u6b63\u5f0f\u8868\u4ee5\u540e\uff0c\u5728\u8fdb\u884c\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4e4b\u524d\u9700\u8981\u505a\u9002\u5f53\u7684\u8bed\u6599\u6e05\u6d17\u3002\u6e05\u6d17\u540e\u7684\u8bed\u6599\u5c06\u4f1a\u53d8\u5f97\u66f4\u52a0\u201c\u5e72\u51c0\u201d\u5e76\u6709\u5229\u4e8e\u540e [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"parent":1485,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-1494","page","type-page","status-publish","hentry"],"_links":{"self":[{"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/pages\/1494"}],"collection":[{"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/comments?post=1494"}],"version-history":[{"count":4,"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/pages\/1494\/revisions"}],"predecessor-version":[{"id":1544,"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/pages\/1494\/revisions\/1544"}],"up":[{"embeddable":true,"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/pages\/1485"}],"wp:attachment":[{"href":"http:\/\/www.algmain.com\/index.php\/wp-json\/wp\/v2\/media?parent=1494"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}