nerewarin · October 23, 2025 16:49
diff --git a/main.py b/main.py
 # copy of original code from https://codeshare.io/2jk6DM + fixed and prettified (but not linted yet)
 # Goal: To get top-5 starred repositories from Google GitHub account (https://github.com/google/api)
 #
 # Result: Printed Python dictionary with top-5 repositories where key is a name of repository and value is count of stars. 
 # The result should be sorted in descending order
 # 
 # Sample of potential response from github:
 # [{
 #    "id": 1,
 #    "repo_name": "google_1",
 #    "stars": 10,
 #    "link": "https://"
 # },
 # {
 #    "id": 2,
 #    "repo_name": "google_2",
 #    "stars": 4,
 #    "link": "https://"
 # },
 # {
 #    "id": 3,	
 #    "repo_name": "google_3",
 #    "stars": null,
 #    "link": "https://"
 # }]

 # number of pages is known in advance - lets assume 42 
 # https://github.com/google/api?page=1
 # https://github.com/google/api?page=2
 # ...


 import asyncio
 import uvloop
 import traceback
 import logging
 import argparse
 import os
 from typing import Any, Protocol, List, Dict

 # Global logger - will be reconfigured in main
 logger = logging.getLogger(__name__)

 # Constants
 PAGES_TO_OBSERVE = list(range(1, 43))  # 42 pages
 _PARALLELISM = 10


 def setup_logging(level: str | None = None) -> logging.Logger:
    """Setup logging configuration with level from CLI or environment"""
    if level is None:
        level = os.getenv("LOG_LEVEL", "INFO").upper()
    
    logging.basicConfig(
        level=getattr(logging, level, logging.INFO),
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S"
    )
    
    return logging.getLogger(__name__)


 class RepositoryService(Protocol):
    """Protocol for repository data service"""
    
    async def get_repositories(self, page: int) -> List[Dict[str, Any]]:
        """Get repositories for a given page"""
        ...


 class MockRepositoryService:
    """Mock implementation of RepositoryService that returns fake GitHub-like data"""
    
    async def get_repositories(self, page: int) -> List[Dict[str, Any]]:
        """Mock implementation that returns fake repository data"""
        # Simulate network delay
        await asyncio.sleep(0.2)  # Fixed delay for predictable tests
        
        # Test error handling: always throw timeout error for page 1
        if page == 1:
            raise asyncio.TimeoutError(f"Timeout error for page {page}")
        
        # Generate fake repositories for this page
        repos = []
        for i in range(10):  # 10 repos per page
            repo_id = (page - 1) * 10 + i + 1
            
            # Guarantee at least one None stars value per page
            # Use the last repo in each page for None stars
            if i == 9:  # Last repo in page
                stars = None
            else:
                stars = repo_id * 10  # Deterministic stars based on ID
            
            repo = {
                "id": repo_id,
                "repo_name": f"google_repo_{repo_id}",
                "stars": stars,
                "link": f"https://github.com/google/repo_{repo_id}"
            }
            repos.append(repo)
        
        return repos


 async def _task(page: int, service: RepositoryService, max_tries=5, initial_delay=0.3, multiplier=2) -> List[Dict[str, Any]]:
    if multiplier <= 1:
        raise ValueError(f"multiplier must be greater than 1 but {multiplier=} passed!")

    # backoff(max_tries, initial_delay, multiplier)
    last_exp: BaseException | None = None
    while max_tries:
        max_tries -= 1

        logger.debug(f"requesting page {page}...")
        try:
            batch_result = await service.get_repositories(page)
        except Exception as exc:  # Catch any service errors
            last_exp = exc
            logger.warning(f"Failed to fetch page {page}, retrying... (attempts left: {max_tries})")
            await asyncio.sleep(initial_delay)
            initial_delay *= multiplier
        else:
            break
    else:
        logger.error(f"Failed to fetch page {page} after all retries")
        logger.error("Full traceback:")
        traceback.print_exc()
        if last_exp is not None:
            raise last_exp
        else:
            raise RuntimeError(f"Failed to fetch page {page} after all retries")

    return batch_result


 def _flatten(list_of_lists: List[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
    res: List[Dict[str, Any]] = []
    for lst in list_of_lists:
        res += lst
    return res


 async def _get_pages(pages_to_observe: List[int], service: RepositoryService):
    # create task group
    tasks = [_task(page, service) for page in pages_to_observe]

    # with asyncio.TaskGroup(min(_PARALLELISM, len(repos_to_observe))):

    # Handle errors with return_exceptions=True to prevent one failure from stopping all tasks
    unflatten_results: List[List[Dict[str, Any]] | BaseException] = await asyncio.gather(*tasks, return_exceptions=True)
    
    # Filter out exceptions and log them
    valid_results: List[List[Dict[str, Any]]] = []
    for i, result in enumerate(unflatten_results):
        if isinstance(result, BaseException):
            logger.error(f"Task {i} failed: {result}")
        else:
            valid_results.append(result)

    return _flatten(valid_results)


 async def main(pages_to_observe: List[int], service: RepositoryService):
    """
    1. load 42 pages in parallel
    2. group results
    3. order flatten list of results by stars amount desc
    4. filter top 5 entries
    5. return them as a list of dicts
    """

    # get results if all the parallel requests have no error
    pages: List[Dict[str, Any]] = await _get_pages(pages_to_observe, service)

    ordered_pages = sorted(
        pages,
        key=lambda x: x.get("stars", 0) or 0,
        reverse=True,
    )

    return ordered_pages[:5]


 def parse_args():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(
        description="Get top 5 starred repositories from Google's GitHub account",
        epilog="""
 Examples:
  %(prog)s                           # Run with default INFO logging
  %(prog)s --log-level DEBUG         # Show debug messages including page requests
  %(prog)s --log-level ERROR         # Only show errors
  LOG_LEVEL=DEBUG %(prog)s           # Use environment variable for log level

 Environment Variables:
  LOG_LEVEL    Set default logging level (DEBUG, INFO, WARNING, ERROR)
        """,
        formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument(
        "--log-level",
        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
        default=os.getenv("LOG_LEVEL", "INFO"),
        help="Set logging level (default: INFO, can also use LOG_LEVEL env var)"
    )
    return parser.parse_args()


 if __name__ == '__main__':
    args = parse_args()
    
    # Setup logging with CLI level
    setup_logging(args.log_level)
    
    logger.info("Starting repository fetcher")
    uvloop.install()
    service = MockRepositoryService()
    result = asyncio.run(main(PAGES_TO_OBSERVE, service))
    
    logger.info("Top 5 repositories:")
    for i, repo in enumerate(result, 1):
        logger.info(f"{i}. {repo}")
	# copy of original code from https://codeshare.io/2jk6DM + fixed and prettified (but not linted yet)
	# Goal: To get top-5 starred repositories from Google GitHub account (https://github.com/google/api)
	#
	# Result: Printed Python dictionary with top-5 repositories where key is a name of repository and value is count of stars.
	# The result should be sorted in descending order
	#
	# Sample of potential response from github:
	# [{
	# "id": 1,
	# "repo_name": "google_1",
	# "stars": 10,
	# "link": "https://"
	# },
	# {
	# "id": 2,
	# "repo_name": "google_2",
	# "stars": 4,
	# "link": "https://"
	# },
	# {
	# "id": 3,
	# "repo_name": "google_3",
	# "stars": null,
	# "link": "https://"
	# }]

	# number of pages is known in advance - lets assume 42
	# https://github.com/google/api?page=1
	# https://github.com/google/api?page=2
	# ...


	import asyncio
	import uvloop
	import traceback
	import logging
	import argparse
	import os
	from typing import Any, Protocol, List, Dict

	# Global logger - will be reconfigured in main
	logger = logging.getLogger(__name__)

	# Constants
	PAGES_TO_OBSERVE = list(range(1, 43)) # 42 pages
	_PARALLELISM = 10


	def setup_logging(level: str \| None = None) -> logging.Logger:
	"""Setup logging configuration with level from CLI or environment"""
	if level is None:
	level = os.getenv("LOG_LEVEL", "INFO").upper()

	logging.basicConfig(
	level=getattr(logging, level, logging.INFO),
	format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
	datefmt="%Y-%m-%d %H:%M:%S"
	)

	return logging.getLogger(__name__)


	class RepositoryService(Protocol):
	"""Protocol for repository data service"""

	async def get_repositories(self, page: int) -> List[Dict[str, Any]]:
	"""Get repositories for a given page"""
	...


	class MockRepositoryService:
	"""Mock implementation of RepositoryService that returns fake GitHub-like data"""

	async def get_repositories(self, page: int) -> List[Dict[str, Any]]:
	"""Mock implementation that returns fake repository data"""
	# Simulate network delay
	await asyncio.sleep(0.2) # Fixed delay for predictable tests

	# Test error handling: always throw timeout error for page 1
	if page == 1:
	raise asyncio.TimeoutError(f"Timeout error for page {page}")

	# Generate fake repositories for this page
	repos = []
	for i in range(10): # 10 repos per page
	repo_id = (page - 1) * 10 + i + 1

	# Guarantee at least one None stars value per page
	# Use the last repo in each page for None stars
	if i == 9: # Last repo in page
	stars = None
	else:
	stars = repo_id * 10 # Deterministic stars based on ID

	repo = {
	"id": repo_id,
	"repo_name": f"google_repo_{repo_id}",
	"stars": stars,
	"link": f"https://github.com/google/repo_{repo_id}"
	}
	repos.append(repo)

	return repos


	async def _task(page: int, service: RepositoryService, max_tries=5, initial_delay=0.3, multiplier=2) -> List[Dict[str, Any]]:
	if multiplier <= 1:
	raise ValueError(f"multiplier must be greater than 1 but {multiplier=} passed!")

	# backoff(max_tries, initial_delay, multiplier)
	last_exp: BaseException \| None = None
	while max_tries:
	max_tries -= 1

	logger.debug(f"requesting page {page}...")
	try:
	batch_result = await service.get_repositories(page)
	except Exception as exc: # Catch any service errors
	last_exp = exc
	logger.warning(f"Failed to fetch page {page}, retrying... (attempts left: {max_tries})")
	await asyncio.sleep(initial_delay)
	initial_delay *= multiplier
	else:
	break
	else:
	logger.error(f"Failed to fetch page {page} after all retries")
	logger.error("Full traceback:")
	traceback.print_exc()
	if last_exp is not None:
	raise last_exp
	else:
	raise RuntimeError(f"Failed to fetch page {page} after all retries")

	return batch_result


	def _flatten(list_of_lists: List[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
	res: List[Dict[str, Any]] = []
	for lst in list_of_lists:
	res += lst
	return res


	async def _get_pages(pages_to_observe: List[int], service: RepositoryService):
	# create task group
	tasks = [_task(page, service) for page in pages_to_observe]

	# with asyncio.TaskGroup(min(_PARALLELISM, len(repos_to_observe))):

	# Handle errors with return_exceptions=True to prevent one failure from stopping all tasks
	unflatten_results: List[List[Dict[str, Any]] \| BaseException] = await asyncio.gather(*tasks, return_exceptions=True)

	# Filter out exceptions and log them
	valid_results: List[List[Dict[str, Any]]] = []
	for i, result in enumerate(unflatten_results):
	if isinstance(result, BaseException):
	logger.error(f"Task {i} failed: {result}")
	else:
	valid_results.append(result)

	return _flatten(valid_results)


	async def main(pages_to_observe: List[int], service: RepositoryService):
	"""
	1. load 42 pages in parallel
	2. group results
	3. order flatten list of results by stars amount desc
	4. filter top 5 entries
	5. return them as a list of dicts
	"""

	# get results if all the parallel requests have no error
	pages: List[Dict[str, Any]] = await _get_pages(pages_to_observe, service)

	ordered_pages = sorted(
	pages,
	key=lambda x: x.get("stars", 0) or 0,
	reverse=True,
	)

	return ordered_pages[:5]


	def parse_args():
	"""Parse command line arguments"""
	parser = argparse.ArgumentParser(
	description="Get top 5 starred repositories from Google's GitHub account",
	epilog="""
	Examples:
	%(prog)s # Run with default INFO logging
	%(prog)s --log-level DEBUG # Show debug messages including page requests
	%(prog)s --log-level ERROR # Only show errors
	LOG_LEVEL=DEBUG %(prog)s # Use environment variable for log level

	Environment Variables:
	LOG_LEVEL Set default logging level (DEBUG, INFO, WARNING, ERROR)
	""",
	formatter_class=argparse.RawDescriptionHelpFormatter
	)
	parser.add_argument(
	"--log-level",
	choices=["DEBUG", "INFO", "WARNING", "ERROR"],
	default=os.getenv("LOG_LEVEL", "INFO"),
	help="Set logging level (default: INFO, can also use LOG_LEVEL env var)"
	)
	return parser.parse_args()


	if __name__ == '__main__':
	args = parse_args()

	# Setup logging with CLI level
	setup_logging(args.log_level)

	logger.info("Starting repository fetcher")
	uvloop.install()
	service = MockRepositoryService()
	result = asyncio.run(main(PAGES_TO_OBSERVE, service))

	logger.info("Top 5 repositories:")
	for i, repo in enumerate(result, 1):
	logger.info(f"{i}. {repo}")
No results found