Common problems and questions will be posted here.

Encoding Issues

Sometimes, Windows computers stick a weird unicode object \u2013 at the beginning of files. This makes the function pybel.parser.utils.sanitize_file_lines() have a problem. The solution, when loading a BEL script via pybel.from_path() is to use the encodings keyword argument to specify the right encoding. The default is utf-8 because this is the most common, but when this error happens, set it explicitly to utf_8_sig. More specific documentation is available in the Inputs and Outputs page.


>>> import pybel
>>> graph = pybel.from_path('~/Desktop/small_corpus.bel')

UnicodeDecodeError Traceback (most recent call last) <ipython-input-11-99f2a76596b1> in <module>()
7 ad = pybel.from_pickle(path_2_AD_pickle)
8 else:
----> 9 ad = pybel.from_path(path_2_AD_bel)
10 pybel.to_pickle(ad, path_2_AD_pickle)

C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\ in from_path(path, **kwargs)
61'Loading from path: %s', path)
62 with open(os.path.expanduser(path)) as f:
---> 63 return BELGraph(lines=f, **kwargs)

C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\ in __init__(self, lines, context, lenient, definition_cache_manager, log_stream, *attrs, **kwargs)
103 if lines is not None:
--> 104 self.parse_lines(lines, context, lenient,
--> definition_cache_manager, log_stream)
106 def parse_lines(self, lines, context=None, lenient=False, definition_cache_manager=None, log_stream=None):

C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\ in parse_lines(self, lines, context, lenient, definition_cache_manager, log_stream)
125 self.context = context
--> 127 docs, defs, states =
--> split_file_to_annotations_and_definitions(lines)
129 if isinstance(definition_cache_manager, DefinitionCacheManager):

C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\parser\ in split_file_to_annotations_and_definitions(file)
49 def split_file_to_annotations_and_definitions(file):
50 """Enumerates a line iterable and splits into 3 parts"""
---> 51 content = list(sanitize_file_lines(file))
53 end_document_section = 1 + max(j for j, (i, l) in enumerate(content) if l.startswith('SET DOCUMENT'))

C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\parser\ in sanitize_file_lines(f)
16 it = iter(it)
---> 18 for line_number, line in it:
19 if line.endswith('\\'):
20 log.log(4, 'Multiline quote starting on line: %d', line_number)

C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\parser\ in <genexpr>(.0)
12 def sanitize_file_lines(f):
13 """Enumerates a line iterator and returns the pairs of (line number, line) that are cleaned"""
---> 14 it = (line.strip() for line in f)
15 it = filter(lambda i_l: i_l[1] and not i_l[1].startswith('#'), enumerate(it, start=1))
16 it = iter(it)

C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\encodings\ in decode(self, input, final)
21 class IncrementalDecoder(codecs.IncrementalDecoder):
22 def decode(self, input, final=False):
---> 23 return
---> codecs.charmap_decode(input,self.errors,decoding_table)[0]
25 class StreamWriter(Codec,codecs.StreamWriter):

UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 4668: character maps to <undefined>


>>> import pybel
>>> graph = pybel.from_path('~/Desktop/small_corpus.bel', encoding='utf_8_sig')
>>> # Success!