From b31fc10ec0f6a8e00cf6753e639a6373c8c76b8e Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 18 Aug 2018 12:16:17 +0100 Subject: [PATCH 1/7] Main refactoring of validate_docstrings scripts, so it can be executed for all docstrings --- scripts/validate_docstrings.py | 396 ++++++++++++++++----------------- 1 file changed, 195 insertions(+), 201 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index cdea2d8b83abd..0230df835841e 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -15,7 +15,7 @@ """ import os import sys -import csv +import json import re import functools import collections @@ -44,77 +44,115 @@ PRIVATE_CLASSES = ['NDFrame', 'IndexOpsMixin'] -def _load_obj(obj_name): - for maxsplit in range(1, obj_name.count('.') + 1): - # TODO when py3 only replace by: module, *func_parts = ... - func_name_split = obj_name.rsplit('.', maxsplit) - module = func_name_split[0] - func_parts = func_name_split[1:] - try: - obj = importlib.import_module(module) - except ImportError: - pass - else: - continue +def get_api_items(): + api_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst') - if 'module' not in locals(): - raise ImportError('No module can be imported ' - 'from "{}"'.format(obj_name)) + previous_line = current_section = current_subsection = '' + position = None + with open(api_fname) as f: + for line in f: + line = line.strip() + if len(line) == len(previous_line): + if set(line) == set('-'): + current_section = previous_line + continue + if set(line) == set('~'): + current_subsection = previous_line + continue - for part in func_parts: - obj = getattr(obj, part) - return obj + if line.startswith('.. currentmodule::'): + current_module = line.replace('.. currentmodule::', '').strip() + continue + if line == '.. autosummary::': + position = 'autosummary' + continue -def _to_original_callable(obj): - while True: - if inspect.isfunction(obj) or inspect.isclass(obj): - f = inspect.getfile(obj) - if f.startswith('<') and f.endswith('>'): - return None - return obj - if inspect.ismethod(obj): - obj = obj.__func__ - elif isinstance(obj, functools.partial): - obj = obj.func - elif isinstance(obj, property): - obj = obj.fget - else: - return None + if position == 'autosummary': + if line == '': + position = 'items' + continue + if position == 'items': + if line == '': + position = None + continue + item = line.strip() + func = importlib.import_module(current_module) + for part in item.split('.'): + func = getattr(func, part) -def _output_header(title, width=80, char='#'): - full_line = char * width - side_len = (width - len(title) - 2) // 2 - adj = '' if len(title) % 2 == 0 else ' ' - title_line = '{side} {title}{adj} {side}'.format(side=char * side_len, - title=title, - adj=adj) + yield ('.'.join([current_module, item]), func, + current_section, current_subsection) - return '\n{full_line}\n{title_line}\n{full_line}\n\n'.format( - full_line=full_line, title_line=title_line) + previous_line = line class Docstring(object): - def __init__(self, method_name, method_obj): - self.method_name = method_name - self.method_obj = method_obj - self.raw_doc = method_obj.__doc__ or '' - self.clean_doc = pydoc.getdoc(self.method_obj) + def __init__(self, name): + self.name = name + obj = self._load_obj(name) + obj = self._to_original_callable(obj) + self.obj = obj + self.raw_doc = obj.__doc__ or '' + self.clean_doc = pydoc.getdoc(obj) self.doc = NumpyDocString(self.clean_doc) def __len__(self): return len(self.raw_doc) + @staticmethod + def _load_obj(obj_name): + for maxsplit in range(1, obj_name.count('.') + 1): + # TODO when py3 only replace by: module, *func_parts = ... + func_name_split = obj_name.rsplit('.', maxsplit) + module = func_name_split[0] + func_parts = func_name_split[1:] + try: + obj = importlib.import_module(module) + except ImportError: + pass + else: + continue + + if 'module' not in locals(): + raise ImportError('No module can be imported ' + 'from "{}"'.format(obj_name)) + + for part in func_parts: + obj = getattr(obj, part) + return obj + + @staticmethod + def _to_original_callable(obj): + while True: + if inspect.isfunction(obj) or inspect.isclass(obj): + f = inspect.getfile(obj) + if f.startswith('<') and f.endswith('>'): + return None + return obj + if inspect.ismethod(obj): + obj = obj.__func__ + elif isinstance(obj, functools.partial): + obj = obj.func + elif isinstance(obj, property): + obj = obj.fget + else: + return None + + @property + def type(self): + return type(self.obj).__name__ + @property def is_function_or_method(self): # TODO(py27): remove ismethod - return (inspect.isfunction(self.method_obj) - or inspect.ismethod(self.method_obj)) + return (inspect.isfunction(self.obj) + or inspect.ismethod(self.obj)) @property def source_file_name(self): - fname = inspect.getsourcefile(self.method_obj) + fname = inspect.getsourcefile(self.obj) if fname: fname = os.path.relpath(fname, BASE_PATH) return fname @@ -122,7 +160,7 @@ def source_file_name(self): @property def source_file_def_line(self): try: - return inspect.getsourcelines(self.method_obj)[-1] + return inspect.getsourcelines(self.obj)[-1] except OSError: pass @@ -184,14 +222,14 @@ def doc_parameters(self): @property def signature_parameters(self): - if inspect.isclass(self.method_obj): - if hasattr(self.method_obj, '_accessors') and ( - self.method_name.split('.')[-1] in - self.method_obj._accessors): + if inspect.isclass(self.obj): + if hasattr(self.obj, '_accessors') and ( + self.name.split('.')[-1] in + self.obj._accessors): # accessor classes have a signature but don't want to show this return tuple() try: - sig = signature(self.method_obj) + sig = signature(self.obj) except (TypeError, ValueError): # Some objects, mainly in C extensions do not support introspection # of the signature @@ -256,7 +294,7 @@ def yields(self): @property def method_source(self): - return inspect.getsource(self.method_obj) + return inspect.getsource(self.obj) @property def first_line_ends_in_dot(self): @@ -266,7 +304,7 @@ def first_line_ends_in_dot(self): @property def deprecated(self): pattern = re.compile('.. deprecated:: ') - return (self.method_name.startswith('pandas.Panel') or + return (self.name.startswith('pandas.Panel') or bool(pattern.search(self.summary)) or bool(pattern.search(self.extended_summary))) @@ -281,121 +319,13 @@ def examples_errors(self): runner = doctest.DocTestRunner(optionflags=flags) context = {'np': numpy, 'pd': pandas} error_msgs = '' - for test in finder.find(self.raw_doc, self.method_name, globs=context): + for test in finder.find(self.raw_doc, self.name, globs=context): f = StringIO() runner.run(test, out=f.write) error_msgs += f.getvalue() return error_msgs -def get_api_items(): - api_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst') - - previous_line = current_section = current_subsection = '' - position = None - with open(api_fname) as f: - for line in f: - line = line.strip() - if len(line) == len(previous_line): - if set(line) == set('-'): - current_section = previous_line - continue - if set(line) == set('~'): - current_subsection = previous_line - continue - - if line.startswith('.. currentmodule::'): - current_module = line.replace('.. currentmodule::', '').strip() - continue - - if line == '.. autosummary::': - position = 'autosummary' - continue - - if position == 'autosummary': - if line == '': - position = 'items' - continue - - if position == 'items': - if line == '': - position = None - continue - item = line.strip() - func = importlib.import_module(current_module) - for part in item.split('.'): - func = getattr(func, part) - - yield ('.'.join([current_module, item]), func, - current_section, current_subsection) - - previous_line = line - - -def _csv_row(func_name, func_obj, section, subsection, in_api, seen={}): - obj_type = type(func_obj).__name__ - original_callable = _to_original_callable(func_obj) - if original_callable is None: - return [func_name, obj_type] + [''] * 12, '' - else: - doc = Docstring(func_name, original_callable) - key = doc.source_file_name, doc.source_file_def_line - shared_code = seen.get(key, '') - return [func_name, - obj_type, - in_api, - int(doc.deprecated), - section, - subsection, - doc.source_file_name, - doc.source_file_def_line, - doc.github_url, - int(bool(doc.summary)), - int(bool(doc.extended_summary)), - int(doc.correct_parameters), - int(bool(doc.examples)), - shared_code], key - - -def validate_all(): - writer = csv.writer(sys.stdout) - cols = ('Function or method', - 'Type', - 'In API doc', - 'Is deprecated', - 'Section', - 'Subsection', - 'File', - 'Code line', - 'GitHub link', - 'Has summary', - 'Has extended summary', - 'Parameters ok', - 'Has examples', - 'Shared code with') - writer.writerow(cols) - seen = {} - api_items = list(get_api_items()) - for func_name, func, section, subsection in api_items: - row, key = _csv_row(func_name, func, section, subsection, - in_api=1, seen=seen) - seen[key] = func_name - writer.writerow(row) - - api_item_names = set(list(zip(*api_items))[0]) - for class_ in (pandas.Series, pandas.DataFrame, pandas.Panel): - for member in inspect.getmembers(class_): - func_name = 'pandas.{}.{}'.format(class_.__name__, member[0]) - if (not member[0].startswith('_') and - func_name not in api_item_names): - func = _load_obj(func_name) - row, key = _csv_row(func_name, func, section='', subsection='', - in_api=0) - writer.writerow(row) - - return 0 - - def validate_one(func_name): """ Validate the docstring for the given func_name @@ -403,18 +333,15 @@ def validate_one(func_name): Parameters ---------- func_name : function - Function whose docstring will be evaluated + Function whose docstring will be evaluated (e.g. pandas.read_csv). Returns ------- - int - The number of errors found in the `func_name` docstring + dict + A dictionary containing all the information obtained from validating + the docstring. """ - func_obj = _load_obj(func_name) - doc = Docstring(func_name, func_obj) - - sys.stderr.write(_output_header('Docstring ({})'.format(func_name))) - sys.stderr.write('{}\n'.format(doc.clean_doc)) + doc = Docstring(func_name) errs = [] wrns = [] @@ -439,7 +366,9 @@ def validate_one(func_name): errs.append('Summary does not start with a capital letter') if doc.summary[-1] != '.': errs.append('Summary does not end with a period') - if (doc.is_function_or_method and + if doc.summary != doc.summary.lstrip(): + errs.append('Summary contains heading whitespaces.') + elif (doc.is_function_or_method and doc.summary.split(' ')[0][-1] == 's'): errs.append('Summary must start with infinitive verb, ' 'not third person (e.g. use "Generate" instead of ' @@ -505,42 +434,107 @@ def validate_one(func_name): if examples_errs: errs.append('Examples do not pass tests') - sys.stderr.write(_output_header('Validation')) - if errs: - sys.stderr.write('Errors found:\n') - for err in errs: - sys.stderr.write('\t{}\n'.format(err)) - if wrns: - sys.stderr.write('Warnings found:\n') - for wrn in wrns: - sys.stderr.write('\t{}\n'.format(wrn)) + return {'type': doc.type, + 'docstring': doc.clean_doc, + 'deprecated': doc.deprecated, + 'file': doc.source_file_name, + 'file_line': doc.source_file_def_line, + 'github_link': doc.github_url, + 'errors': errs, + 'warnings': wrns, + 'examples_errors': examples_errs} - if not errs: - sys.stderr.write('Docstring for "{}" correct. :)\n'.format(func_name)) - if examples_errs: - sys.stderr.write(_output_header('Doctests')) - sys.stderr.write(examples_errs) +def validate_all(): + """ + Execute the validation of all docstrings, and return a dict with the + results. - return len(errs) + Returns + ------- + dict + A dictionary with an item for every function/method... containing + all the validation information. + """ + result = {} + seen = {} + # functions from the API docs + api_items = list(get_api_items()) + for func_name, func_obj, section, subsection in api_items: + doc_info = validate_one(func_name) + result[func_name] = doc_info -def main(function): - if function is None: - return validate_all() + shared_code_key = doc_info['file'], doc_info['file_line'] + shared_code = seen.get(shared_code_key, '') + result[func_name].update({'in_api': True, + 'section': section, + 'subsection': subsection, + 'shared_code_with': shared_code}) + + seen[shared_code_key] = func_name + + # functions from introspecting Series, DataFrame and Panel + api_item_names = set(list(zip(*api_items))[0]) + for class_ in (pandas.Series, pandas.DataFrame, pandas.Panel): + for member in inspect.getmembers(class_): + func_name = 'pandas.{}.{}'.format(class_.__name__, member[0]) + if (not member[0].startswith('_') and + func_name not in api_item_names): + doc_info = validate_one(func_name) + result[func_name] = doc_info + result[func_name]['in_api'] = False + + return result + + +def main(func_name, fd): + def header(title, width=80, char='#'): + full_line = char * width + side_len = (width - len(title) - 2) // 2 + adj = '' if len(title) % 2 == 0 else ' ' + title_line = '{side} {title}{adj} {side}'.format(side=char * side_len, + title=title, + adj=adj) + + return '\n{full_line}\n{title_line}\n{full_line}\n\n'.format( + full_line=full_line, title_line=title_line) + + if func_name is None: + json_doc = validate_all() + fd.write(json.dumps(json_doc)) else: - return validate_one(function) + doc_info = validate_one(func_name) + + fd.write(header('Docstring ({})'.format(func_name))) + fd.write('{}\n'.format(doc_info['docstring'])) + fd.write(header('Validation')) + if doc_info['errors']: + fd.write('Errors found:\n') + for err in doc_info['errors']: + fd.write('\t{}\n'.format(err)) + if doc_info['warnings']: + fd.write('Warnings found:\n') + for wrn in doc_info['warnings']: + fd.write('\t{}\n'.format(wrn)) + + if not doc_info['errors']: + fd.write('Docstring for "{}" correct. :)\n'.format(func_name)) + + if doc_info['examples_errors']: + fd.write(header('Doctests')) + fd.write(doc_info['examples_errors']) if __name__ == '__main__': + func_help = ('function or method to validate (e.g. pandas.DataFrame.head) ' + 'if not provided, all docstrings are validated and returned ' + 'as JSON') argparser = argparse.ArgumentParser( description='validate pandas docstrings') argparser.add_argument('function', nargs='?', default=None, - help=('function or method to validate ' - '(e.g. pandas.DataFrame.head) ' - 'if not provided, all docstrings ' - 'are validated')) + help=func_help) args = argparser.parse_args() - sys.exit(main(args.function)) + sys.exit(main(args.function, sys.stdout)) From 5d2815faa370c019f6cd135b678a660c674b0804 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 18 Aug 2018 14:06:58 +0100 Subject: [PATCH 2/7] Fixing errors with introspection --- scripts/validate_docstrings.py | 56 ++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 0230df835841e..cdb044e4ecc73 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -92,8 +92,8 @@ class Docstring(object): def __init__(self, name): self.name = name obj = self._load_obj(name) - obj = self._to_original_callable(obj) self.obj = obj + self.code_obj = self._to_original_callable(obj) self.raw_doc = obj.__doc__ or '' self.clean_doc = pydoc.getdoc(obj) self.doc = NumpyDocString(self.clean_doc) @@ -102,10 +102,28 @@ def __len__(self): return len(self.raw_doc) @staticmethod - def _load_obj(obj_name): - for maxsplit in range(1, obj_name.count('.') + 1): + def _load_obj(name): + """ + Import Python object from its name as string. + + Parameters + ---------- + name : str + Object name to import (e.g. pandas.Series.str.upper) + + Returns + ------- + object + Python object that can be a class, method, function... + + Examples + -------- + >>> Docstring._load_obj('pandas.Series') + + """ + for maxsplit in range(1, name.count('.') + 1): # TODO when py3 only replace by: module, *func_parts = ... - func_name_split = obj_name.rsplit('.', maxsplit) + func_name_split = name.rsplit('.', maxsplit) module = func_name_split[0] func_parts = func_name_split[1:] try: @@ -117,7 +135,7 @@ def _load_obj(obj_name): if 'module' not in locals(): raise ImportError('No module can be imported ' - 'from "{}"'.format(obj_name)) + 'from "{}"'.format(name)) for part in func_parts: obj = getattr(obj, part) @@ -125,6 +143,13 @@ def _load_obj(obj_name): @staticmethod def _to_original_callable(obj): + """ + Find the Python object that contains the source code ot the object. + + This is useful to find the place in the source code (file and line + number) where a docstring is defined. It does not currently work for + all cases, but it should help find some (properties...). + """ while True: if inspect.isfunction(obj) or inspect.isclass(obj): f = inspect.getfile(obj) @@ -152,16 +177,20 @@ def is_function_or_method(self): @property def source_file_name(self): - fname = inspect.getsourcefile(self.obj) - if fname: - fname = os.path.relpath(fname, BASE_PATH) - return fname + try: + fname = inspect.getsourcefile(self.code_obj) + except TypeError: + pass + else: + if fname: + fname = os.path.relpath(fname, BASE_PATH) + return fname @property def source_file_def_line(self): try: - return inspect.getsourcelines(self.obj)[-1] - except OSError: + return inspect.getsourcelines(self.code_obj)[-1] + except (OSError, TypeError): pass @property @@ -294,7 +323,10 @@ def yields(self): @property def method_source(self): - return inspect.getsource(self.obj) + try: + return inspect.getsource(self.obj) + except TypeError: + return '' @property def first_line_ends_in_dot(self): From 5bb7b9b73597b2788fbe752a5baf260c071c8aef Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 18 Aug 2018 14:22:26 +0100 Subject: [PATCH 3/7] Adapting tests to the new format --- .../tests/scripts/test_validate_docstrings.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pandas/tests/scripts/test_validate_docstrings.py b/pandas/tests/scripts/test_validate_docstrings.py index 1d35d5d30bba3..2deb062c6f032 100644 --- a/pandas/tests/scripts/test_validate_docstrings.py +++ b/pandas/tests/scripts/test_validate_docstrings.py @@ -490,25 +490,25 @@ def _import_path(self, klass=None, func=None): return base_path def test_good_class(self): - assert validate_one(self._import_path( # noqa: F821 - klass='GoodDocStrings')) == 0 + assert len(validate_one(self._import_path( # noqa: F821 + klass='GoodDocStrings'))['errors']) == 0 @pytest.mark.parametrize("func", [ 'plot', 'sample', 'random_letters', 'sample_values', 'head', 'head1', 'contains']) def test_good_functions(self, func): - assert validate_one(self._import_path( # noqa: F821 - klass='GoodDocStrings', func=func)) == 0 + assert len(validate_one(self._import_path( # noqa: F821 + klass='GoodDocStrings', func=func))['errors']) == 0 def test_bad_class(self): - assert validate_one(self._import_path( # noqa: F821 - klass='BadGenericDocStrings')) > 0 + assert len(validate_one(self._import_path( # noqa: F821 + klass='BadGenericDocStrings'))['errors']) > 0 @pytest.mark.parametrize("func", [ 'func', 'astype', 'astype1', 'astype2', 'astype3', 'plot', 'method']) def test_bad_generic_functions(self, func): - assert validate_one(self._import_path( # noqa:F821 - klass='BadGenericDocStrings', func=func)) > 0 + assert len(validate_one(self._import_path( # noqa:F821 + klass='BadGenericDocStrings', func=func))['errors']) > 0 @pytest.mark.parametrize("klass,func,msgs", [ # Summary tests @@ -546,7 +546,6 @@ def test_bad_generic_functions(self, func): marks=pytest.mark.xfail) ]) def test_bad_examples(self, capsys, klass, func, msgs): - validate_one(self._import_path(klass=klass, func=func)) # noqa:F821 - err = capsys.readouterr().err + res = validate_one(self._import_path(klass=klass, func=func)) # noqa:F821 for msg in msgs: - assert msg in err + assert msg in ''.join(res['errors']) From 188f4a39870fb1565af8a17b731023e1c7ef3243 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 20 Aug 2018 13:51:20 +0100 Subject: [PATCH 4/7] Addressing comments from the code review --- .../tests/scripts/test_validate_docstrings.py | 28 ++++++++++------- scripts/validate_docstrings.py | 30 +++++++++++++++++++ 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/pandas/tests/scripts/test_validate_docstrings.py b/pandas/tests/scripts/test_validate_docstrings.py index 2deb062c6f032..b64b16aca8def 100644 --- a/pandas/tests/scripts/test_validate_docstrings.py +++ b/pandas/tests/scripts/test_validate_docstrings.py @@ -490,25 +490,33 @@ def _import_path(self, klass=None, func=None): return base_path def test_good_class(self): - assert len(validate_one(self._import_path( # noqa: F821 - klass='GoodDocStrings'))['errors']) == 0 + errors = validate_one(self._import_path( # noqa: F821 + klass='GoodDocStrings'))['errors'] + assert isinstance(errors, list) + assert not errors @pytest.mark.parametrize("func", [ 'plot', 'sample', 'random_letters', 'sample_values', 'head', 'head1', 'contains']) def test_good_functions(self, func): - assert len(validate_one(self._import_path( # noqa: F821 - klass='GoodDocStrings', func=func))['errors']) == 0 + errors = validate_one(self._import_path( # noqa: F821 + klass='GoodDocStrings', func=func))['errors'] + assert isinstance(errors, list) + assert not errors def test_bad_class(self): - assert len(validate_one(self._import_path( # noqa: F821 - klass='BadGenericDocStrings'))['errors']) > 0 + errors = validate_one(self._import_path( # noqa: F821 + klass='BadGenericDocStrings'))['errors'] + assert isinstance(errors, list) + assert errors @pytest.mark.parametrize("func", [ 'func', 'astype', 'astype1', 'astype2', 'astype3', 'plot', 'method']) def test_bad_generic_functions(self, func): - assert len(validate_one(self._import_path( # noqa:F821 - klass='BadGenericDocStrings', func=func))['errors']) > 0 + errors = validate_one(self._import_path( # noqa:F821 + klass='BadGenericDocStrings', func=func))['errors'] + assert isinstance(errors, list) + assert errors @pytest.mark.parametrize("klass,func,msgs", [ # Summary tests @@ -546,6 +554,6 @@ def test_bad_generic_functions(self, func): marks=pytest.mark.xfail) ]) def test_bad_examples(self, capsys, klass, func, msgs): - res = validate_one(self._import_path(klass=klass, func=func)) # noqa:F821 + result = validate_one(self._import_path(klass=klass, func=func)) # noqa:F821 for msg in msgs: - assert msg in ''.join(res['errors']) + assert msg in ' '.join(result['errors']) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index cdb044e4ecc73..b8a5523d21568 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -45,6 +45,24 @@ def get_api_items(): + """ + Parse api.rst file from the documentation, and extract all the functions, + methods, classes, attributes... This should include all pandas public API. + + Yields + ------ + name : str + The name of the object (e.g. 'pandas.Series.str.upper). + func : function + The object itself. In most cases this will be a function or method, + but it can also be classes, properties, cython objects... + section : str + The name of the section in the API page where the object item is + located. + subsection : str + The name of the subsection in the API page where the object item is + located. + """ api_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst') previous_line = current_section = current_subsection = '' @@ -177,9 +195,15 @@ def is_function_or_method(self): @property def source_file_name(self): + """ + File name where the object is implemented (e.g. pandas/core/frame.py). + """ try: fname = inspect.getsourcefile(self.code_obj) except TypeError: + # In some cases the object is something complex like a cython + # object that can't be easily introspected. An it's better to + # return the source code file of the object as None, than crash pass else: if fname: @@ -188,9 +212,15 @@ def source_file_name(self): @property def source_file_def_line(self): + """ + Number of line where the object is defined in its file. + """ try: return inspect.getsourcelines(self.code_obj)[-1] except (OSError, TypeError): + # In some cases the object is something complex like a cython + # object that can't be easily introspected. An it's better to + # return the line number as None, than crash pass @property From 3a5e7fd823462f8b899bd154adc03280aa4b57d2 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 28 Sep 2018 16:36:00 +0100 Subject: [PATCH 5/7] Fixing pep8 --- scripts/validate_docstrings.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index a065c72d74001..d999323944240 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -376,9 +376,9 @@ def first_line_ends_in_dot(self): @property def deprecated(self): pattern = re.compile('.. deprecated:: ') - return (self.name.startswith('pandas.Panel') or - bool(pattern.search(self.summary)) or - bool(pattern.search(self.extended_summary))) + return (self.name.startswith('pandas.Panel') + or bool(pattern.search(self.summary)) + or bool(pattern.search(self.extended_summary))) @property def mentioned_private_classes(self): @@ -440,8 +440,8 @@ def validate_one(func_name): errs.append('Summary does not end with a period') if doc.summary != doc.summary.lstrip(): errs.append('Summary contains heading whitespaces.') - elif (doc.is_function_or_method and - doc.summary.split(' ')[0][-1] == 's'): + elif (doc.is_function_or_method + and doc.summary.split(' ')[0][-1] == 's'): errs.append('Summary must start with infinitive verb, ' 'not third person (e.g. use "Generate" instead of ' '"Generates")') @@ -553,8 +553,8 @@ def validate_all(): for class_ in (pandas.Series, pandas.DataFrame, pandas.Panel): for member in inspect.getmembers(class_): func_name = 'pandas.{}.{}'.format(class_.__name__, member[0]) - if (not member[0].startswith('_') and - func_name not in api_item_names): + if (not member[0].startswith('_') + and func_name not in api_item_names): doc_info = validate_one(func_name) result[func_name] = doc_info result[func_name]['in_api'] = False From 1d2c5c0422e1e98d1635a3717a101a313e89b868 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 30 Sep 2018 00:33:47 +0100 Subject: [PATCH 6/7] Adding tests to the function that parses api.rst --- scripts/tests/test_validate_docstrings.py | 77 +++++++++++++++++++++++ scripts/validate_docstrings.py | 75 +++++++++++----------- 2 files changed, 117 insertions(+), 35 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index ba2d72b484702..058781ff80312 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -1,5 +1,6 @@ import string import random +import io import pytest import numpy as np @@ -605,3 +606,79 @@ def test_bad_examples(self, capsys, klass, func, msgs): result = validate_one(self._import_path(klass=klass, func=func)) # noqa:F821 for msg in msgs: assert msg in ' '.join(result['errors']) + + +class TestApiItems(object): + @property + def api_doc(self): + return io.StringIO(''' +.. currentmodule:: itertools + +Itertools +--------- + +Infinite +~~~~~~~~ + +.. autosummary:: + + cycle + count + +Finite +~~~~~~ + +.. autosummary:: + + chain + +.. currentmodule:: random + +Random +------ + +All +~~~ + +.. autosummary:: + + seed + randint +''') + + @pytest.mark.parametrize('idx,name', [(0, 'itertools.cycle'), + (1, 'itertools.count'), + (2, 'itertools.chain'), + (3, 'random.seed'), + (4, 'random.randint')]) + def test_item_name(self, idx, name): + res = list(validate_docstrings.get_api_items(self.api_doc)) + assert res[idx][0] == name + + @pytest.mark.parametrize('idx,func', [(0, 'cycle'), + (1, 'count'), + (2, 'chain'), + (3, 'seed'), + (4, 'randint')]) + def test_item_function(self, idx, func): + res = list(validate_docstrings.get_api_items(self.api_doc)) + assert callable(res[idx][1]) + assert res[idx][1].__name__ == func + + @pytest.mark.parametrize('idx,section', [(0, 'Itertools'), + (1, 'Itertools'), + (2, 'Itertools'), + (3, 'Random'), + (4, 'Random')]) + def test_item_section(self, idx, section): + res = list(validate_docstrings.get_api_items(self.api_doc)) + assert res[idx][2] == section + + @pytest.mark.parametrize('idx,subsection', [(0, 'Infinite'), + (1, 'Infinite'), + (2, 'Finite'), + (3, 'All'), + (4, 'All')]) + def test_item_subsection(self, idx, subsection): + res = list(validate_docstrings.get_api_items(self.api_doc)) + assert res[idx][3] == subsection diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index d999323944240..4407feb1fc818 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -45,11 +45,17 @@ DIRECTIVES = ['versionadded', 'versionchanged', 'deprecated'] -def get_api_items(): +def get_api_items(api_doc_fd): """ Parse api.rst file from the documentation, and extract all the functions, methods, classes, attributes... This should include all pandas public API. + Parameters + ---------- + api_doc_fd : file descriptor + A file descriptor of the API documentation page, containing the table + of contents with all the public API. + Yields ------ name : str @@ -64,47 +70,44 @@ def get_api_items(): The name of the subsection in the API page where the object item is located. """ - api_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst') - previous_line = current_section = current_subsection = '' position = None - with open(api_fname) as f: - for line in f: - line = line.strip() - if len(line) == len(previous_line): - if set(line) == set('-'): - current_section = previous_line - continue - if set(line) == set('~'): - current_subsection = previous_line - continue - - if line.startswith('.. currentmodule::'): - current_module = line.replace('.. currentmodule::', '').strip() + for line in api_doc_fd: + line = line.strip() + if len(line) == len(previous_line): + if set(line) == set('-'): + current_section = previous_line continue - - if line == '.. autosummary::': - position = 'autosummary' + if set(line) == set('~'): + current_subsection = previous_line continue - if position == 'autosummary': - if line == '': - position = 'items' - continue + if line.startswith('.. currentmodule::'): + current_module = line.replace('.. currentmodule::', '').strip() + continue - if position == 'items': - if line == '': - position = None - continue - item = line.strip() - func = importlib.import_module(current_module) - for part in item.split('.'): - func = getattr(func, part) + if line == '.. autosummary::': + position = 'autosummary' + continue + + if position == 'autosummary': + if line == '': + position = 'items' + continue + + if position == 'items': + if line == '': + position = None + continue + item = line.strip() + func = importlib.import_module(current_module) + for part in item.split('.'): + func = getattr(func, part) - yield ('.'.join([current_module, item]), func, - current_section, current_subsection) + yield ('.'.join([current_module, item]), func, + current_section, current_subsection) - previous_line = line + previous_line = line class Docstring(object): @@ -534,7 +537,9 @@ def validate_all(): seen = {} # functions from the API docs - api_items = list(get_api_items()) + api_doc_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst') + with open(api_doc_fname) as f: + api_items = list(get_api_items(f)) for func_name, func_obj, section, subsection in api_items: doc_info = validate_one(func_name) result[func_name] = doc_info From db782fded5b52c5a23b3b3449c39ca8fbf58716a Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 8 Oct 2018 09:30:38 +0530 Subject: [PATCH 7/7] Addressing comments from review --- scripts/tests/test_validate_docstrings.py | 20 ++++++++++---------- scripts/validate_docstrings.py | 2 ++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 058781ff80312..27c63e3ba3a79 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -608,7 +608,7 @@ def test_bad_examples(self, capsys, klass, func, msgs): assert msg in ' '.join(result['errors']) -class TestApiItems(object): +class ApiItems(object): @property def api_doc(self): return io.StringIO(''' @@ -652,8 +652,8 @@ def api_doc(self): (3, 'random.seed'), (4, 'random.randint')]) def test_item_name(self, idx, name): - res = list(validate_docstrings.get_api_items(self.api_doc)) - assert res[idx][0] == name + result = list(validate_docstrings.get_api_items(self.api_doc)) + assert result[idx][0] == name @pytest.mark.parametrize('idx,func', [(0, 'cycle'), (1, 'count'), @@ -661,9 +661,9 @@ def test_item_name(self, idx, name): (3, 'seed'), (4, 'randint')]) def test_item_function(self, idx, func): - res = list(validate_docstrings.get_api_items(self.api_doc)) - assert callable(res[idx][1]) - assert res[idx][1].__name__ == func + result = list(validate_docstrings.get_api_items(self.api_doc)) + assert callable(result[idx][1]) + assert result[idx][1].__name__ == func @pytest.mark.parametrize('idx,section', [(0, 'Itertools'), (1, 'Itertools'), @@ -671,8 +671,8 @@ def test_item_function(self, idx, func): (3, 'Random'), (4, 'Random')]) def test_item_section(self, idx, section): - res = list(validate_docstrings.get_api_items(self.api_doc)) - assert res[idx][2] == section + result = list(validate_docstrings.get_api_items(self.api_doc)) + assert result[idx][2] == section @pytest.mark.parametrize('idx,subsection', [(0, 'Infinite'), (1, 'Infinite'), @@ -680,5 +680,5 @@ def test_item_section(self, idx, section): (3, 'All'), (4, 'All')]) def test_item_subsection(self, idx, subsection): - res = list(validate_docstrings.get_api_items(self.api_doc)) - assert res[idx][3] == subsection + result = list(validate_docstrings.get_api_items(self.api_doc)) + assert result[idx][3] == subsection diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 4407feb1fc818..6588522331433 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -47,6 +47,8 @@ def get_api_items(api_doc_fd): """ + Yield information about all public API items. + Parse api.rst file from the documentation, and extract all the functions, methods, classes, attributes... This should include all pandas public API.