Skip to content

Instantly share code, notes, and snippets.

@lehrblogger
Last active December 17, 2015 20:29
Show Gist options
  • Select an option

  • Save lehrblogger/5668256 to your computer and use it in GitHub Desktop.

Select an option

Save lehrblogger/5668256 to your computer and use it in GitHub Desktop.
unicode troubles in ejabberd_xmlrpc
Hi,
I'm having trouble using unicode characters in roster item nicknames set
from Python using ejabberd_xmlrpc. Those nicknames work fine if I set them
from the command line using ejabberdctl add_rosteritem, but if
use add_rosteritem via xmlrpc, then the command returns as if it
succeeded... but the roster item is somehow corrupt. ejabberd throws errors
if I try to view the data using get_roster or the web admin interface, and
the user with the corrupt roster is also unable to log in.
I've tested this pretty thoroughly and documented it in the comments here:
https://gist.github.com/lehrblogger/5668256/ - I'm happy to format a
version to send to the list, but I'm not sure it would be legible and I
wanted to keep this short. I'm using the 2.1.x branch of both
http://github.com/processone/ejabberd and
http://github.com/processone/ejabberd-contrib with a nearly-default
ejabberd.cfg file on a fresh VM.
I know *much* more about character encoding now than I did this afternoon,
but it's still possible I'm making a simple mistake somewhere. But, if not,
could there be a bug in the ejabberd_xmlrpc module? I've looked at the
source, but haven't had any success figuring out what to patch.
Thanks!
Best,
Steven
@lehrblogger
Copy link
Author

And my attempt to modify the xmlrpc library itself, in case I don't keep my fork of it:

$ git remote -v
origin  [email protected]:lehrblogger/xmlrpc.git (fetch)
origin  [email protected]:lehrblogger/xmlrpc.git (push)
$ git log -p -n 2
commit d9b027dd7df4230cd840dbde8cdeec5b959b3a9a
Author: Steven Lehrburger <>
Date:   Thu May 30 02:28:39 2013 -0400

    Fixing errors with unicode

diff --git a/src/xmlrpc_encode.erl b/src/xmlrpc_encode.erl
index 6535545..df4a41c 100644
--- a/src/xmlrpc_encode.erl
+++ b/src/xmlrpc_encode.erl
@@ -109,14 +109,14 @@ encode({base64, Base64}) ->
 %    end;
     ["<base64>", Base64, "</base64>"];
 encode(Unicode) when is_list(Unicode) ->
-    case io_lib:printable_unicode_list(Unicode) of
-        true -> 
-            Binary = binary_to_list(unicode:characters_to_binary(Unicode)),
-            case xmlrpc_util:is_string(Binary) of         
-                yes -> ["<string>", escape_string(Binary), "</string>"];
-                no -> {error, {bad_unicode_value, Binary}}   
-            end;
-        false ->  {error, {bad_unicode_value, Unicode}}
+    Unicode = case io_lib:printable_unicode_list(Unicode) of
+        true ->
+            binary_to_list(unicode:characters_to_binary(Unicode));
+        false -> Unicode
+    end,
+    case xmlrpc_util:is_string(Unicode) of       
+        yes -> ["<string>", escape_string(Unicode), "</string>"];
+        no -> {error, {binary_bad_unicode_value, Unicode}}   
     end;
 encode(Value) ->
     case xmlrpc_util:is_string(Value) of

commit 30150e712d48e8063af7db63cba48fd0864a51cb
Author: Steven Lehrburger <>
Date:   Thu May 30 01:22:44 2013 -0400

    Properly encode lists of unicode code points

diff --git a/src/xmlrpc_encode.erl b/src/xmlrpc_encode.erl
index 636cf15..6535545 100644
--- a/src/xmlrpc_encode.erl
+++ b/src/xmlrpc_encode.erl
@@ -108,6 +108,16 @@ encode({base64, Base64}) ->
 %      no -> {error, {bad_base64, Base64}}
 %    end;
     ["<base64>", Base64, "</base64>"];
+encode(Unicode) when is_list(Unicode) ->
+    case io_lib:printable_unicode_list(Unicode) of
+        true -> 
+            Binary = binary_to_list(unicode:characters_to_binary(Unicode)),
+            case xmlrpc_util:is_string(Binary) of         
+                yes -> ["<string>", escape_string(Binary), "</string>"];
+                no -> {error, {bad_unicode_value, Binary}}   
+            end;
+        false ->  {error, {bad_unicode_value, Unicode}}
+    end;
 encode(Value) ->
     case xmlrpc_util:is_string(Value) of
        yes -> ["<string>", escape_string(Value), "</string>"];

dashdash-chat/xmlrpc@d9b027d
dashdash-chat/xmlrpc@30150e7

@lehrblogger
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment