gitlab
python
import requests
import pandas as pd
# GitLab API设置
GITLAB_API_URL = "https://gitlab.com/api/v4"
PROJECT_ID = "your_project_id"
ACCESS_TOKEN = "your_access_token"
def get_gitlab_commits(project_id, token, per_page=100):
"""获取 GitLab 仓库的所有提交记录"""
commits = []
page = 1
while True:
# 构造 API 请求 URL
url = f"{GITLAB_API_URL}/projects/{project_id}/repository/commits"
params = {
"per_page": per_page,
"page": page,
"private_token": token
}
# 发送请求获取提交记录
response = requests.get(url, params=params)
if response.status_code != 200:
raise Exception(f"Failed to fetch commits: {response.text}")
data = response.json()
if not data:
break # 如果没有更多数据则退出
commits.extend(data)
page += 1
return commits
def get_changed_files_in_commit(project_id, token, commit_sha):
"""获取指定提交的所有变更文件"""
url = f"{GITLAB_API_URL}/projects/{project_id}/repository/commits/{commit_sha}/diff"
params = {
"private_token": token
}
response = requests.get(url, params=params)
if response.status_code != 200:
raise Exception(f"Failed to fetch commit diff: {response.text}")
data = response.json()
changed_files = [file['new_path'] for file in data if
file['new_path'].endswith('.c') or file['new_path'].endswith('.h')]
return changed_files
def count_file_changes(project_id, token):
"""统计 .c 和 .h 文件的历史变更次数"""
commits = get_gitlab_commits(project_id, token)
file_changes = {}
# 遍历每个提交记录,获取变更的文件
for commit in commits:
commit_sha = commit['id']
changed_files = get_changed_files_in_commit(project_id, token, commit_sha)
for file_path in changed_files:
if file_path not in file_changes:
file_changes[file_path] = 0
file_changes[file_path] += 1 # 每次出现,变更次数加1
# 对于没有变更的文件,记录为 0
all_files = set(file_changes.keys())
return file_changes
def save_to_excel(file_changes, output_file='file_changes.xlsx'):
"""将文件变更次数保存到 Excel 文件"""
# 将文件变更次数转换为 DataFrame
df = pd.DataFrame(list(file_changes.items()), columns=['File', 'Change Count'])
# 保存为 Excel 文件
df.to_excel(output_file, index=False, engine='openpyxl')
print(f"File changes have been saved to {output_file}")
def main():
# 获取文件变更统计
file_changes = count_file_changes(PROJECT_ID, ACCESS_TOKEN)
# 将结果保存为 Excel 文件
save_to_excel(file_changes)
if __name__ == '__main__':
main()
svn
python
import subprocess
import xml.etree.ElementTree as ET
import pandas as pd
def run_svn_command(cmd, repo_url=None, working_copy=None):
"""运行 SVN 命令并返回输出"""
if working_copy:
cmd.insert(0, working_copy)
elif repo_url:
cmd.insert(0, repo_url)
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
raise Exception(f"SVN command failed: {result.stderr}")
return result.stdout
def get_svn_log(repo_url, start_rev=0, limit=100):
"""获取 SVN 仓库的提交日志(分页)"""
cmd = ['svn', 'log', '--xml', '--verbose', '--start', str(start_rev), '--limit', str(limit)]
log_output = run_svn_command(cmd, repo_url=repo_url)
return log_output
def get_changed_files_in_commit(log_xml):
"""从 SVN 日志 XML 输出中提取每个提交的变化文件"""
changed_files = []
root = ET.fromstring(log_xml)
for logentry in root.findall('logentry'):
revision = logentry.get('revision')
for path in logentry.findall('paths/path'):
file_path = path.text
if file_path.endswith('.c') or file_path.endswith('.h'):
changed_files.append((revision, file_path))
return changed_files
def get_all_commits(repo_url):
"""获取 SVN 仓库所有提交的 .c 和 .h 文件的变更历史"""
start_rev = 0
limit = 100 # 每次请求获取 100 条日志
all_changed_files = []
while True:
log_xml = get_svn_log(repo_url, start_rev, limit)
changed_files = get_changed_files_in_commit(log_xml)
if not changed_files:
break # 没有更多提交记录,停止循环
all_changed_files.extend(changed_files)
# 获取下一次请求的起始版本号
root = ET.fromstring(log_xml)
last_revision = root.find('logentry').get('revision')
start_rev = int(last_revision) + 1
return all_changed_files
def count_file_changes(repo_url):
"""统计 SVN 仓库中每个 .c 和 .h 文件的变更次数"""
all_changed_files = get_all_commits(repo_url)
file_changes = {}
for revision, file_path in all_changed_files:
if file_path not in file_changes:
file_changes[file_path] = 0
file_changes[file_path] += 1 # 每次出现,变更次数加1
return file_changes
def save_to_excel(file_changes, output_file='file_changes.xlsx'):
"""将文件变更次数保存到 Excel 文件"""
# 将文件变更次数转换为 DataFrame
df = pd.DataFrame(list(file_changes.items()), columns=['File', 'Change Count'])
# 保存为 Excel 文件
df.to_excel(output_file, index=False, engine='openpyxl')
print(f"File changes have been saved to {output_file}")
def main():
# 远程 SVN 仓库的 URL
repo_url = 'https://your-svn-repository-url/path/to/repository'
# 获取文件变更统计
file_changes = count_file_changes(repo_url)
# 将结果保存为 Excel 文件
save_to_excel(file_changes)
if __name__ == '__main__':
main()