随机从项目中取3500行代码

随机从项目中取3500行代码

import os
import random


def collect_code_files(root_dir, extensions):
    """Collects all code files in the given directory with the specified extensions."""
    code_files = []
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if any(file.endswith(ext) for ext in extensions):
                code_files.append(os.path.join(subdir, file))
    return code_files


def read_all_lines(code_files):
    """Reads all lines from the given list of code files."""
    all_lines = []
    for file in code_files:
        with open(file, 'r', encoding='utf-8', errors='ignore') as f:
            all_lines.extend(f.readlines())
    return all_lines


def get_random_lines(all_lines, num_lines):
    """Randomly selects the specified number of lines from the list of all lines."""
    return random.sample(all_lines, num_lines)


def main():
    # Define the root directory of your project and the file extensions to include
    root_dir = 'path/to/your/project'
    extensions = ['.java', '.py', '.cpp', '.c', '.cs', '.h']

    # Collect all code files in the project
    code_files = collect_code_files(root_dir, extensions)

    # Read all lines from the code files
    all_lines = read_all_lines(code_files)

    # Check if we have enough lines
    if len(all_lines) < 3500:
        print(f"Not enough lines in the project. Only found {len(all_lines)} lines.")
        return

    # Get 3500 random lines
    random_lines = get_random_lines(all_lines, 3500)

    # Write the random lines to a new file
    with open('random_3500_lines.txt', 'w', encoding='utf-8') as output_file:
        output_file.writelines(random_lines)

    print(f"3500 random lines have been written to random_3500_lines.txt")


if __name__ == "__main__":
    main()

随机从项目中取3500行代码, 尽量连续, 连续不超过100行

import os
import random

def collect_code_files(root_dir, extensions):
    """Collects all code files in the given directory with the specified extensions."""
    code_files = []
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if any(file.endswith(ext) for ext in extensions):
                code_files.append(os.path.join(subdir, file))
    return code_files

def read_and_chunk_file(file_path, max_chunk_size=100):
    """Reads a file and splits it into chunks of at most max_chunk_size lines."""
    chunks = []
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        lines = f.readlines()
        for i in range(0, len(lines), max_chunk_size):
            chunks.append(lines[i:i + max_chunk_size])
    return chunks

def main():
    # Define the root directory of your project and the file extensions to include
    root_dir = 'path/to/your/project'
    extensions = ['.java', '.py', '.cpp', '.c', '.cs', '.h']
    max_chunk_size = 100
    total_lines_needed = 3500

    # Collect all code files in the project
    code_files = collect_code_files(root_dir, extensions)

    # Read and chunk all code files
    all_chunks = []
    for file in code_files:
        all_chunks.extend(read_and_chunk_file(file, max_chunk_size))

    # Shuffle all chunks to ensure randomness
    random.shuffle(all_chunks)

    # Select chunks until we have enough lines
    selected_lines = []
    for chunk in all_chunks:
        if len(selected_lines) + len(chunk) <= total_lines_needed:
            selected_lines.extend(chunk)
        else:
            remaining_lines_needed = total_lines_needed - len(selected_lines)
            selected_lines.extend(chunk[:remaining_lines_needed])
            break

    # Write the selected lines to a new file
    with open('random_3500_lines.txt', 'w', encoding='utf-8') as output_file:
        output_file.writelines(selected_lines)

    print(f"3500 random lines have been written to random_3500_lines.txt")

if __name__ == "__main__":
    main()

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值