def unicode_truncate(s, length, encoding='utf-8'):
encoded = s.encode(encoding)[:length]
return encoded.decode(encoding, 'ignore')
Here is an example for unicode string where each character is represented with 2 bytes in UTF-8:
>>> unicode_truncate(u'абвгд', 5)
u'\u0430\u0431'