Gitlab
目前工作中大多用的是gitlab,比较少的情况下会用到svn。
GitLab 提供了基于 Git 的版本控制系统,允许团队成员有效地管理和追踪代码的历史版本。每个更改都可以追溯,方便回滚或合并不同的开发分支。
遇到比较少的情况下需要统计代码行数,说实话,统计提交代码数确实比较奇怪,但现实中有些时候确实还需要。
svn的代码统计,我之前有搜索到相关方案,需要下载一个工具,我个人感觉如果项目比较多的情况,还是比较麻烦的。
gitlab本身提供的有open api(svn有没有我不太了解),在代码统计要方便的多。
GitLab Open API 文档是 GitLab 官方提供的接口参考手册,开发者可以利用这些 API 与 GitLab 服务器进行交互,自动化执行各种任务,比如创建项目、管理用户权限、读取或修改代码仓库内容、处理合并请求、触发 CI/CD 流水线、获取仓库统计信息等。
这是gitlab open api的文档地址: docs.gitlab.com/ee/api/rest...
下面提供一些自己用的方案,实现场景因需调整,不一定直接复刻。
shell命令
下面这些命令在仓库的根目录下执行:
shell
git log --author="你的名字" --pretty=tformat: --numstat | awk '{ add += $1; subs += $2; loc += $1 + $2 } END { printf "增加行数: %s, 删除行数: %s, 总行数: %s\n", add, subs, loc }'
示例:
当然,也可以指定日期, 名字根据配置获取(如下获取从2024年1月1日到25年提交的代码):
shell
git log --author="$(git config --get user.name)" --since='2024-01-01' --until='2025-01-01' --pretty=tformat: --numstat | gawk '{ add += $1 ; subs += $2 ; loc += $1 + $2 } END { printf "增加行数:%s 删除行数:%s 总行数: %s\n",add,subs,loc }'
这个方案可以灵活调整,比如for循环遍历所有仓库等。
如果是在windows下,比如我的示例,需要打开git bash命令行,我是本地安装git客户端的时候就有,在目录下,shitf+鼠标右键。
Python 脚本
下面提供一个我用的python脚本,要求python环境大于等于3.10,因为我用3.10写的,其它版本是否支持相关语法,我不确定,3.6以下应该不支持。
定义git_statistics.py, 下面是代码
python
# git_statistics.py
import datetime
from collections import defaultdict
import requests
"""git仓库地址"""
root_url = "https://gitlab.***.com"
"""在git上设置的token"""
token = "glpat-33dddsssff-22-Jm-B"
"""统计的开始日期"""
start_day = "2024-01-01"
"""统计的结束日期"""
end_day = "2025-01-01"
"""统计的时间区间-开始日期,datetime对象"""
start_date = datetime.datetime.strptime(start_day, '%Y-%m-%d')
"""统计的时间区间-结束日期,datetime对象"""
end_date = datetime.datetime.strptime(end_day, '%Y-%m-%d')
"""查询仓库列表 url"""
query_repository_list_url = f"{root_url}/api/v4/projects?private_token={token}&per_page=1000"
"""根据full_path过滤的仓库"""
exclude_paths = ()
"""哪些仓库路径前缀要排除"""
exclude_prefix = ()
"""哪些项目要排除"""
exclude_project = ("仓库1", "仓库2", "仓库3")
datetime_format = "%Y-%m-%dT%H:%M:%S.%fZ"
def get_all_commits(repository):
"""该仓库指定时间内,默认分支的所有提交"""
since_date = start_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
until_date = end_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
url = f"{root_url}/api/v4/projects/{repository.id}/repository/commits?page=1&per_page=10000&ref_name={repository.default_branch}&since={since_date}&until={until_date}&private_token={token}"
response = requests.get(url)
commits = response.json()
if len(commits) == 0:
return None
# 根据提交用户分组
user_dict = defaultdict(list)
for commit_record in commits:
commit = Commit()
commit.id = commit_record['id']
commit.repository_name = repository.name
commit.committer_name = commit_record['committer_name']
commit.committer_email = commit_record['committer_email']
user_dict[commit.committer_email].append(commit)
return user_dict
def get_commit_stats(repository_id, commit_id):
"""获取每个提交的明细"""
url = f"{root_url}/api/v4/projects/{repository_id}/repository/commits/{commit_id}?private_token={token}"
response = requests.get(url)
detail = response.json()
stats = CommitStats()
stats.total = detail['stats']['total']
stats.deletions = detail['stats']['deletions']
stats.additions = detail['stats']['additions']
return stats
def start():
"""启动统计"""
repositories = []
for i in range(1, 100):
# 返回的每页的最大数量是有限制的,所以分页查询
response = requests.get(f"{query_repository_list_url}&page={i}")
res = response.json()
# print(f"仓库总数量:{len(res)}")
if len(res) <= 0:
break
# 先遍历所有的仓库
for e in res:
last_active_time = datetime.datetime.strptime(e['last_activity_at'], datetime_format)
if last_active_time < start_date:
continue
repository = Repository()
repository.id = e['id']
repository.name = e['name']
repository.path = e['path_with_namespace']
repository.web_url = e['web_url']
repository.full_path = e['namespace']['full_path']
repository.default_branch = e['default_branch']
if not exclude_prefix and any(repository.name.startswith(prefix) for prefix in exclude_prefix):
continue
if not exclude_project and repository.name in exclude_project:
continue
repositories.append(repository)
print(f"本轮需要统计的仓库数量: {len(repositories)}")
for r in repositories:
print(r.name)
user_commit_statistics_list = []
i = 0
# 获取每个仓库的统计信息
for repository in repositories:
# 当前仓库,每个用户的所有提交记录
user_commits_dict = get_all_commits(repository)
if user_commits_dict is None:
continue
# i += 1
# if i > 2:
# break
for email, commits in user_commits_dict.items():
user = CommitRepositoryUser()
user.email = email
user.repository_name = repository.name
exist = []
for commit in commits:
# 避免重复
if commit.id in exist:
continue
exist.append(commit.id)
user.username = commit.committer_name
user.commit_total += 1
stats = get_commit_stats(repository.id, commit.id)
user.total += stats.total
user.additions += stats.additions
user.deletions += stats.deletions
print(
f"{repository.name}: {user.username} commits: {user.commit_total}, total: {user.total}, 增加代码: {user.additions}, 删除代码: {user.deletions} ")
user_commit_statistics_list.append(user)
print("统计执行完成")
#
# 计算每个用户的提交总数
user_statistics_dict = defaultdict(list)
# 每个项目的提交列表
repository_statistics_dict = defaultdict(list)
for ucs in user_commit_statistics_list:
user_statistics_dict[ucs.email].append(ucs)
repository_statistics_dict[ucs.repository_name].append(ucs)
out_lines = []
out_lines.append("姓名, 邮箱, 项目, 提交数, 总提交行数, 增加代码行数, 删除代码行数")
for usl in user_statistics_dict.values():
cru = CommitRepositoryUser()
for us in usl:
cru.email = us.email
cru.username = us.username
cru.total += us.total
cru.additions += us.additions
cru.deletions += us.deletions
cru.commit_total += us.commit_total
out_lines.append(
f"{us.username}, {us.email}, {us.repository_name}, {us.commit_total}, {us.total}, {us.additions}, {us.deletions}")
out_lines.append(
f"{cru.username}, {cru.email}, , {cru.commit_total}, {cru.total}, {cru.additions}, {cru.deletions}")
out_lines.append(f", , , , , , ")
with open('user-output.csv', mode='w', newline='', encoding='utf-8-sig') as csvfile:
for line in out_lines:
csvfile.write(line + "\r\n")
# 计算每个仓库的总提交数
repository_out_lines = []
repository_out_lines.append("项目, 提交次数, 提交代码行数, 新增代码行数, 删除代码行数")
for repository_name, usl in repository_statistics_dict.items():
cru = CommitRepositoryUser()
cru.repository_name = repository_name
for us in usl:
cru.total += us.total
cru.additions += us.additions
cru.deletions += us.deletions
cru.commit_total += us.commit_total
repository_out_lines.append(f"{cru.repository_name}, {cru.commit_total}, {cru.total}, {cru.additions}, {cru.deletions}")
with open('repository-output.csv', mode='w', newline='', encoding='utf-8-sig') as csvfile:
for line in repository_out_lines:
csvfile.write(line + "\r\n")
class Repository:
"""仓库信息,只定义关注的字段"""
id = None
name = None
path = None
default_branch = None
web_url = None
full_path = None
class Commit:
"""提交记录"""
id = None
committer_name = None
committer_email = None
repository_name = None
class CommitStats:
"""每个提交记录的提交统计"""
additions = 0
deletions = 0
total = 0
class CommitUser:
username = None
email = None
additions = 0
deletions = 0
total = 0
commit_total = 0
class CommitRepositoryUser(CommitUser):
repository_name = None
在main.py进行调用:
python
from git_statistics import start
if __name__ == '__main__':
start()
实现很简单,遍历账户下所有仓库(排除指定的仓库),获取仓库每个时间段的提交明细记录,对提交的代码行数进行整理计算,最后输出一个csv文件。
只统计默认分支的,如果需要计算所有分支,请自己调整。
需要获取gitlab的一个token,可以上管理台自己生成,然后替换代码中的token。