MS-575 Add Clang-format & Clang-tidy & Cpplint

Former-commit-id: e62b3b42b0b27c3bbbb6d62cfff4a518913298d7
This commit is contained in:
test 2019-09-20 06:57:23 +00:00
parent 7a18293865
commit 0ff59df1a4
6 changed files with 277 additions and 57 deletions

1
cpp/.gitignore vendored
View File

@ -8,3 +8,4 @@ output.info
output_new.info
server.info
thirdparty/knowhere/
*.pyc

View File

@ -90,6 +90,8 @@ else()
set(MILVUS_BUILD_ARCH unknown)
endif()
find_package (Python COMPONENTS Interpreter Development)
find_package(CUDA)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -fPIC -std=c++11 -D_FORCE_INLINES --expt-extended-lambda")
@ -157,8 +159,47 @@ install(FILES
DESTINATION
conf)
add_custom_target(format ${BUILD_SUPPORT_DIR}/run_clang_format.py
${CLANG_FORMAT_BIN}
${BUILD_SUPPORT_DIR}/clang_format_exclusions.txt
${CMAKE_CURRENT_SOURCE_DIR}/src)
#
# "make lint" target
#
if(NOT MILVUS_VERBOSE_LINT)
set(MILVUS_LINT_QUIET "--quiet")
endif()
if(NOT LINT_EXCLUSIONS_FILE)
# source files matching a glob from a line in this file
# will be excluded from linting (cpplint, clang-tidy, clang-format)
set(LINT_EXCLUSIONS_FILE ${BUILD_SUPPORT_DIR}/lint_exclusions.txt)
endif()
#
# "make format" and "make check-format" targets
#
if(${CLANG_FORMAT_FOUND})
# runs clang format and updates files in place.
add_custom_target(format
${PYTHON_EXECUTABLE}
${BUILD_SUPPORT_DIR}/run_clang_format.py
--clang_format_binary
${CLANG_FORMAT_BIN}
--exclude_globs
${LINT_EXCLUSIONS_FILE}
--source_dir
${CMAKE_CURRENT_SOURCE_DIR}/src
--fix
${MILVUS_LINT_QUIET})
# runs clang format and exits with a non-zero exit code if any files need to be reformatted
add_custom_target(check-format
${PYTHON_EXECUTABLE}
${BUILD_SUPPORT_DIR}/run_clang_format.py
--clang_format_binary
${CLANG_FORMAT_BIN}
--exclude_globs
${LINT_EXCLUSIONS_FILE}
--source_dir
${CMAKE_CURRENT_SOURCE_DIR}/src
${MILVUS_LINT_QUIET})
endif()

110
cpp/build-support/lintutils.py Executable file
View File

@ -0,0 +1,110 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import multiprocessing as mp
import os
from fnmatch import fnmatch
from subprocess import Popen
def chunk(seq, n):
"""
divide a sequence into equal sized chunks
(the last chunk may be smaller, but won't be empty)
"""
chunks = []
some = []
for element in seq:
if len(some) == n:
chunks.append(some)
some = []
some.append(element)
if len(some) > 0:
chunks.append(some)
return chunks
def dechunk(chunks):
"flatten chunks into a single list"
seq = []
for chunk in chunks:
seq.extend(chunk)
return seq
def run_parallel(cmds, **kwargs):
"""
Run each of cmds (with shared **kwargs) using subprocess.Popen
then wait for all of them to complete.
Runs batches of multiprocessing.cpu_count() * 2 from cmds
returns a list of tuples containing each process'
returncode, stdout, stderr
"""
complete = []
for cmds_batch in chunk(cmds, mp.cpu_count() * 2):
procs_batch = [Popen(cmd, **kwargs) for cmd in cmds_batch]
for proc in procs_batch:
stdout, stderr = proc.communicate()
complete.append((proc.returncode, stdout, stderr))
return complete
_source_extensions = '''
.h
.cc
.cpp
'''.split()
def get_sources(source_dir, exclude_globs=[]):
sources = []
for directory, subdirs, basenames in os.walk(source_dir):
for path in [os.path.join(directory, basename)
for basename in basenames]:
# filter out non-source files
if os.path.splitext(path)[1] not in _source_extensions:
continue
path = os.path.abspath(path)
# filter out files that match the globs in the globs file
if any([fnmatch(path, glob) for glob in exclude_globs]):
continue
sources.append(path)
return sources
def stdout_pathcolonline(completed_process, filenames):
"""
given a completed process which may have reported some files as problematic
by printing the path name followed by ':' then a line number, examine
stdout and return the set of actually reported file names
"""
returncode, stdout, stderr = completed_process
bfilenames = set()
for filename in filenames:
bfilenames.add(filename.encode('utf-8') + b':')
problem_files = set()
for line in stdout.splitlines():
for filename in bfilenames:
if line.startswith(filename):
problem_files.add(filename.decode('utf-8'))
bfilenames.remove(filename)
break
return problem_files, stdout

View File

@ -16,64 +16,127 @@
# specific language governing permissions and limitations
# under the License.
import fnmatch
import os
import subprocess
from __future__ import print_function
import lintutils
from subprocess import PIPE
import argparse
import difflib
import multiprocessing as mp
import sys
if len(sys.argv) < 4:
sys.stderr.write("Usage: %s $CLANG_FORMAT_VERSION exclude_globs.txt "
"$source_dir\n" %
sys.argv[0])
sys.exit(1)
CLANG_FORMAT = sys.argv[1]
EXCLUDE_GLOBS_FILENAME = sys.argv[2]
SOURCE_DIR = sys.argv[3]
if len(sys.argv) > 4:
CHECK_FORMAT = int(sys.argv[4]) == 1
else:
CHECK_FORMAT = False
from functools import partial
exclude_globs = [line.strip() for line in open(EXCLUDE_GLOBS_FILENAME, "r")]
# examine the output of clang-format and if changes are
# present assemble a (unified)patch of the difference
def _check_one_file(completed_processes, filename):
with open(filename, "rb") as reader:
original = reader.read()
files_to_format = []
matches = []
for directory, subdirs, files in os.walk(SOURCE_DIR):
for name in files:
name = os.path.join(directory, name)
if not (name.endswith('.h') or name.endswith('.cpp') or name.endswith('.cuh') or name.endswith('.cu')):
continue
returncode, stdout, stderr = completed_processes[filename]
formatted = stdout
if formatted != original:
# Run the equivalent of diff -u
diff = list(difflib.unified_diff(
original.decode('utf8').splitlines(True),
formatted.decode('utf8').splitlines(True),
fromfile=filename,
tofile="{} (after clang format)".format(
filename)))
else:
diff = None
excluded = False
for g in exclude_globs:
if fnmatch.fnmatch(name, g):
excluded = True
break
if not excluded:
files_to_format.append(name)
return filename, diff
if CHECK_FORMAT:
output = subprocess.check_output([CLANG_FORMAT, '-output-replacements-xml']
+ files_to_format,
stderr=subprocess.STDOUT).decode('utf8')
to_fix = []
for line in output.split('\n'):
if 'offset' in line:
to_fix.append(line)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Runs clang-format on all of the source "
"files. If --fix is specified enforce format by "
"modifying in place, otherwise compare the output "
"with the existing file and output any necessary "
"changes as a patch in unified diff format")
parser.add_argument("--clang_format_binary",
required=True,
help="Path to the clang-format binary")
parser.add_argument("--exclude_globs",
help="Filename containing globs for files "
"that should be excluded from the checks")
parser.add_argument("--source_dir",
required=True,
help="Root directory of the source code")
parser.add_argument("--fix", default=False,
action="store_true",
help="If specified, will re-format the source "
"code instead of comparing the re-formatted "
"output, defaults to %(default)s")
parser.add_argument("--quiet", default=False,
action="store_true",
help="If specified, only print errors")
arguments = parser.parse_args()
if len(to_fix) > 0:
print("clang-format checks failed, run 'make format' to fix")
sys.exit(-1)
else:
try:
cmd = [CLANG_FORMAT, '-i'] + files_to_format
subprocess.check_output(cmd, stderr=subprocess.STDOUT)
except Exception as e:
print(e)
print(' '.join(cmd))
raise
exclude_globs = []
if arguments.exclude_globs:
for line in open(arguments.exclude_globs):
exclude_globs.append(line.strip())
formatted_filenames = []
for path in lintutils.get_sources(arguments.source_dir, exclude_globs):
formatted_filenames.append(str(path))
if arguments.fix:
if not arguments.quiet:
print("\n".join(map(lambda x: "Formatting {}".format(x),
formatted_filenames)))
# Break clang-format invocations into chunks: each invocation formats
# 16 files. Wait for all processes to complete
results = lintutils.run_parallel([
[arguments.clang_format_binary, "-i"] + some
for some in lintutils.chunk(formatted_filenames, 16)
])
for returncode, stdout, stderr in results:
# if any clang-format reported a parse error, bubble it
if returncode != 0:
sys.exit(returncode)
else:
# run an instance of clang-format for each source file in parallel,
# then wait for all processes to complete
results = lintutils.run_parallel([
[arguments.clang_format_binary, filename]
for filename in formatted_filenames
], stdout=PIPE, stderr=PIPE)
for returncode, stdout, stderr in results:
# if any clang-format reported a parse error, bubble it
if returncode != 0:
sys.exit(returncode)
error = False
checker = partial(_check_one_file, {
filename: result
for filename, result in zip(formatted_filenames, results)
})
pool = mp.Pool()
try:
# check the output from each invocation of clang-format in parallel
for filename, diff in pool.imap(checker, formatted_filenames):
if not arguments.quiet:
print("Checking {}".format(filename))
if diff:
print("{} had clang-format style issues".format(filename))
# Print out the diff to stderr
error = True
# pad with a newline
print(file=sys.stderr)
diff_out = []
for diff_str in diff:
diff_out.append(diff_str.encode('raw_unicode_escape'))
sys.stderr.writelines(diff_out)
except Exception:
error = True
raise
finally:
pool.terminate()
pool.join()
sys.exit(1 if error else 0)

View File

@ -32,6 +32,7 @@
find_program(CLANG_TIDY_BIN
NAMES
clang-tidy-7.0
clang-tidy-6.0
clang-tidy-5.0
clang-tidy-4.0
@ -83,7 +84,11 @@ if (CLANG_FORMAT_VERSION)
endif()
else()
find_program(CLANG_FORMAT_BIN
NAMES clang-format-4.0
NAMES
clang-format-7.0
clang-format-6.0
clang-format-5.0
clang-format-4.0
clang-format-3.9
clang-format-3.8
clang-format-3.7