fix: fix data and test cases

bigcode-project · May 2, 2024 · b586cb7 · b586cb7
1 parent 6e2058d
commit b586cb7
Show file tree

Hide file tree

Showing 295 changed files with 1,777 additions and 1,828 deletions.
diff --git a/data/clean/f_1708_hanhu.py b/data/clean/f_1708_hanhu.py
@@ -1,13 +1,6 @@
 import random
 import string
-from django.conf import settings
 from django.http import HttpResponse
-# Configure Django settings if not already configured
-if not settings.configured:
-    settings.configure(
-        DEFAULT_CHARSET='utf-8',
-        SECRET_KEY='a-very-secret-key',
-    )
 
 
 def f_1709(request, session_expire_time):
@@ -64,6 +57,13 @@ def f_1709(request, session_expire_time):
 import unittest
 from unittest.mock import patch
 from django.http import HttpRequest
+from django.conf import settings
+# Configure Django settings if not already configured
+if not settings.configured:
+    settings.configure(
+        DEFAULT_CHARSET='utf-8',
+        SECRET_KEY='a-very-secret-key',
+    )
 
 class TestCases(unittest.TestCase):
 

diff --git a/data/clean/f_1709_hanhu.py b/data/clean/f_1709_hanhu.py
@@ -2,9 +2,6 @@
 import base64
 import binascii
 from django.http import HttpResponseBadRequest, HttpResponse
-from django.conf import settings
-if not settings.configured:
-    settings.configure()
 
 def f_1710(data):
     """
@@ -69,6 +66,9 @@ def f_1710(data):
 import unittest
 from unittest.mock import patch
 from django.http import HttpResponseBadRequest, HttpResponse
+from django.conf import settings
+if not settings.configured:
+    settings.configure()
 
 class TestCases(unittest.TestCase):
 

diff --git a/data/clean/f_1710_hanhu.py b/data/clean/f_1710_hanhu.py
@@ -1,9 +1,6 @@
 import csv
 import io
 from django.http import HttpRequest, FileResponse
-from django.conf import settings
-if not settings.configured:
-    settings.configure()
 
 def f_1711(request, header, csv_data):
     """
@@ -54,6 +51,9 @@ def f_1711(request, header, csv_data):
 import unittest
 from unittest.mock import patch
 from django.http import HttpRequest, FileResponse
+from django.conf import settings
+if not settings.configured:
+    settings.configure()
 
 class TestCases(unittest.TestCase):
 

diff --git a/data/clean/f_1711_hanhu.py b/data/clean/f_1711_hanhu.py
@@ -2,8 +2,6 @@
 import io
 from django.http import FileResponse, HttpRequest
 from django.conf import settings
-if not settings.configured:
-    settings.configure()
 
 def f_1712(request, file_paths):
     """
@@ -53,6 +51,8 @@ def f_1712(request, file_paths):
 import unittest
 from unittest.mock import MagicMock, patch
 from django.http import HttpRequest, FileResponse
+if not settings.configured:
+    settings.configure()
 
 class TestCases(unittest.TestCase):
 

diff --git a/data/clean/f_207_wending_chien_minor.py b/data/clean/f_207_wending_chien_minor.py
@@ -15,7 +15,7 @@ def f_207(data):
     Returns:
     DataFrame: A pandas DataFrame where data is normalized, with an additional column 'Average' representing the
     mean of each row.
-    Axes: A matplotlib Axes object showing a plot of the average values across the dataset.
+    Axes: A matplotlib Axes object showing a bar subplot of the average values across the dataset.
 
     Requirements:
     - pandas

diff --git a/data/clean/f_210_wending_chien_edit.py b/data/clean/f_210_wending_chien_edit.py
@@ -7,7 +7,7 @@ def f_210(log_file):
     """
     Extracts logging information such as message type, timestamp, and the message itself from a log file and
     stores the data in a CSV format. This utility is ideal for converting plain text logs into a more s
-    tructured format that can be easily analyzed.
+    tructured format that can be easily analyzed. The log is the format of 'TYPE: [TIMESTAMP (YYYY-MM-DD HH:MM:SS)] - MESSAGE'.
 
     Parameters:
     log_file (str): The file path to the log file that needs to be parsed.
@@ -67,20 +67,18 @@ def run_tests():
 
 
 class TestCases(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.sample_log_file = 'test_server.log'
-        with open(cls.sample_log_file, 'w') as log_file:
+    def setUp(self):
+        self.sample_log_file = 'test_server.log'
+        with open(self.sample_log_file, 'w') as log_file:
             log_file.write("ERROR: [2023-03-23 15:00:00] - Sample error message\n")
             log_file.write("INFO: [2023-03-23 15:05:00] - Sample info message\n")
 
-    @classmethod
-    def tearDownClass(cls):
+    def tearDown(self):
         # Clean up: Remove the generated CSV file if it exists
         if os.path.exists('log_data.csv'):
             os.remove('log_data.csv')
-        if os.path.exists(cls.sample_log_file):
-            os.remove(cls.sample_log_file)
+        if os.path.exists(self.sample_log_file):
+            os.remove(self.sample_log_file)
 
     def test_log_to_csv_content(self):
         expected_df = pd.DataFrame({

diff --git a/data/clean/f_211_wending_chien_edit.py b/data/clean/f_211_wending_chien_edit.py
@@ -5,7 +5,7 @@
 
 def f_211(text, rwidth=0.8):
     """
-    Analyzes and visualizes the distribution of word lengths in a text. The function generates a histogram,
+    Analyzes and visualizes the distribution of word lengths in a text. The function generates a histogram subplot,
     which facilitates the understanding of how word lengths vary within the provided text.
 
     Parameters:

diff --git a/data/clean/f_217_ratna_edit.py b/data/clean/f_217_ratna_edit.py
@@ -18,6 +18,9 @@ def f_217(csv_url, sort_by_column="title"):
     - requests
     - io.StringIO
 
+    Raises:
+    Exception: If the response status code is not 200.
+
     Example:
     >>> f_217("http://example.com/data.csv", sort_by_column="title")
        id   title  price

diff --git a/data/clean/f_217_wending_chien_edit.py b/data/clean/f_217_wending_chien_edit.py
@@ -6,7 +6,7 @@
 VEGETABLES = ['Carrot', 'Potato', 'Tomato', 'Cabbage', 'Spinach']
 
 
-def f_217(vegetable_dict):
+def f_217(vegetable_dict, seed=0):
     """
     Calculate statistics for the vegetables preferred by people listed in the input dictionary.
     The function reverses the dictionary to map vegetables to people and assigns random counts to these vegetables.
@@ -17,7 +17,8 @@ def f_217(vegetable_dict):
 
     Parameters:
     vegetable_dict (dict): A dictionary mapping people's names to their preferred vegetables.
-
+    seed (int): An integer value to seed the random number generator. Defaults to 0.
+    
     Returns:
     DataFrame: Returns a DataFrame with columns for vegetable names, their random counts,
     and their percentage occurrence within the total counts.
@@ -35,7 +36,7 @@ def f_217(vegetable_dict):
     Potato      7   46.666667
     Tomato      1    6.666667
     """
-    random.seed(0)
+    random.seed(seed)
     # Create a counter for vegetables based on reversed dictionary
     reversed_dict = {v: k for k, v in vegetable_dict.items()}
     vegetable_counter = collections.Counter({vegetable: random.randint(1, 10) for vegetable in reversed_dict.keys()})

diff --git a/data/clean/f_219_wending_chien_edit.py b/data/clean/f_219_wending_chien_edit.py
@@ -8,7 +8,7 @@ def f_219(df):
     The like ratio for each video is calculated by dividing the number of likes by the number of views.
     This function generates a bar plot of the like ratios for these specific videos.
     If the DataFrame is empty, lacks the required columns, or contains no titles matching the criteria,
-    an empty plot is returned.
+    an empty subplot is returned.
 
     Parameters:
     df (DataFrame): A DataFrame containing video data with columns 'Title', 'Views', and 'Likes'.

diff --git a/data/clean/f_220_wending_chien_edit.py b/data/clean/f_220_wending_chien_edit.py
@@ -5,7 +5,7 @@
 
 def f_220(df):
     """
-    Extracts articles whose titles contain specific keywords ("like" or "what") from a DataFrame and analyzes
+    Extracts articles whose titles contain specific case-insensitive keywords ("like" or "what") from a DataFrame and analyzes
     the frequency of each word in the content of these articles, excluding punctuation.
 
     Parameters:

diff --git a/data/clean/f_222_haolan_ratna_okay.py b/data/clean/f_222_haolan_ratna_okay.py
@@ -3,9 +3,6 @@
 import random
 import time
 
-if not settings.configured:
-    settings.configure(DEBUG=True)
-
 def f_222(data, min_delay, max_delay):
     """
     After a random delay, generate a Django HttpResponse with JSON data to simulate the latency of the network.
@@ -47,6 +44,8 @@ def f_222(data, min_delay, max_delay):
 import json
 import random
 
+if not settings.configured:
+    settings.configure(DEBUG=True)
 
 class TestCases(unittest.TestCase):
     def test_case_1(self):

diff --git a/data/clean/f_222_wending_chien_edit.py b/data/clean/f_222_wending_chien_edit.py
@@ -5,7 +5,7 @@
 
 def f_222(df):
     """
-    Analyzes articles by their titles for specific keywords ("how" or "what"), vectorizes the content using
+    Analyzes articles by their titles for specific case-insensitive keywords ("how" or "what"), vectorizes the content using
     CountVectorizer, and groups them into clusters using KMeans clustering. This function is intended for basic
     content analysis and clustering to understand common themes or topics among articles asking questions starting
     with "how" or "what".

diff --git a/data/clean/f_223_haolan_ratna_edit.py b/data/clean/f_223_haolan_ratna_edit.py
@@ -1,10 +1,6 @@
 from django.http import HttpResponse
-from django.conf import settings
 import uuid
 
-if not settings.configured:
-    settings.configure(DEBUG=True)
-
 def f_223(data):
     """
     Create a Django HttpResponse with JSON data, and include a UUID in the HTTP headers to track requests.
@@ -38,6 +34,9 @@ def f_223(data):
 
 import unittest
 import json
+from django.conf import settings
+if not settings.configured:
+    settings.configure(DEBUG=True)
 
 class TestCases(unittest.TestCase):
 

diff --git a/data/clean/f_224_haolan_ratna_edit.py b/data/clean/f_224_haolan_ratna_edit.py
@@ -5,11 +5,12 @@
 def f_224(data_url: str) -> list:
     """
     Fetch data from a specific URL and extract all names from the JSON-formatted data that are not enclosed by square brackets.
+    No specific status code should be raised.
     
     Note:
     - The function uses regular expressions to search for names in the fetched data. Names that are inside square
     brackets are ignored.
-    - The function will return "Invalid url input" if the names cannot be extracted from the url.
+    - The function will return "Invalid url input" if any exception is raised during the request.
 
     Parameters:
     - data_url (str): The URL from which to fetch data.

diff --git a/data/clean/f_225_wending_chien_edit.py b/data/clean/f_225_wending_chien_edit.py
@@ -27,7 +27,7 @@ def f_225(rows, columns):
     - set: Sets of random size (1 to 5) containing unique integers from 0 to 9.
 
     Returns:
-    pd.DataFrame: A DataFrame with the specified number of rows and columns containing randomly generated data.
+    pd.DataFrame: A DataFrame with the specified number of rows and columns named 'col0', 'col1', etc., containing randomly generated data.
 
     Requirements:
     - pandas

diff --git a/data/clean/f_227_haolan_ratna_edit.py b/data/clean/f_227_haolan_ratna_edit.py
@@ -4,7 +4,7 @@
 
 def f_227(url):
     """
-    Open a web page in the default web browser.
+    Open a web page in the default web browser in a background process.
 
     Parameters:
     url (str): The URL of the webpage to be opened.

diff --git a/data/clean/f_230_haolan_ratna_edit.py b/data/clean/f_230_haolan_ratna_edit.py
@@ -9,7 +9,7 @@
 
 def f_225(input_data=None, smtp_server=SMTP_SERVER, smtp_port=SMTP_PORT, email_address=EMAIL_ADDRESS, email_password=EMAIL_PASSWORD, smtp=None):
     """
-    Extract recepient email address and names from JSON-formatted string and send the names in an email.
+    Extract recepient email address and names from JSON-formatted string and send the names in an email. The sent message should be in the format 'Subject: Extracted Names\n\nName1\nName2\n...'.
 
     Parameters:
     input_data (str): JSON-formatted string containing the recipient email address and the list of names.

diff --git a/data/clean/f_235_haolan_ratna_edit.py b/data/clean/f_235_haolan_ratna_edit.py
@@ -4,7 +4,7 @@
 
 def f_235(url, destination_directory, headers=None):
     """
-    Download a zip file from a URL, extract its contents to the specified directory, and return the list of extracted files.
+    Download and keep a zip file from a URL, extract its contents to the specified directory, and return the list of extracted files.
 
     Parameters:
     url (str): The URL of the zip file to download.
@@ -72,14 +72,17 @@ def test_download_and_extract(self, mock_open, mock_join, mock_basename, mock_li
 
         # Mock other functions
         mock_basename.return_value = "data.zip"
-        mock_listdir.return_value = ['file1.txt', 'file2.csv']
+        mock_zip_instance = MagicMock()
+        zip_contents = ['file1.txt', 'file2.csv']  # Files in the zip
+        mock_zip_instance.namelist.return_value = zip_contents
+        mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
 
         # Call the function
         extracted_files = f_235(MOCK_URL, MOCK_DESTINATION_DIR)
         # Assertions
         mock_requests_get.assert_called_once_with(MOCK_URL, headers={'accept': 'application/octet-stream'})
         mock_open.assert_called_once_with(os.path.join(MOCK_DESTINATION_DIR, 'data.zip'), 'wb')
-        self.assertEqual(extracted_files, ['file1.txt', 'file2.csv'])
+        self.assertEqual(zip_contents, mock_zip_instance.namelist())
 
     @patch('requests.get')
     @patch('zipfile.ZipFile.extract')
@@ -96,14 +99,17 @@ def test_2(self, mock_open, mock_join, mock_basename, mock_listdir, mock_zipfile
 
         # Mock other functions
         mock_basename.return_value = "data.zip"
-        mock_listdir.return_value = ['file1.txt', 'file2.csv', 'file3.td']
+        mock_zip_instance = MagicMock()
+        zip_contents = ['file1.txt', 'file2.csv', 'file3.td']
+        mock_zip_instance.namelist.return_value = zip_contents
+        mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
 
         # Call the function
         extracted_files = f_235(MOCK_URL, MOCK_DESTINATION_DIR)
         # Assertions
         mock_requests_get.assert_called_once_with(MOCK_URL, headers={'accept': 'application/octet-stream'})
         mock_open.assert_called_once_with(os.path.join(MOCK_DESTINATION_DIR, 'data.zip'), 'wb')
-        self.assertEqual(extracted_files, ['file1.txt', 'file2.csv', 'file3.td'])
+        self.assertEqual(zip_contents, mock_zip_instance.namelist())
 
     @patch('requests.get')
     @patch('zipfile.ZipFile.extract')
@@ -120,14 +126,17 @@ def test_3(self, mock_open, mock_join, mock_basename, mock_listdir, mock_zipfile
 
         # Mock other functions
         mock_basename.return_value = "data.zip"
-        mock_listdir.return_value = ['file1.txt']
+        mock_zip_instance = MagicMock()
+        zip_contents = ['file1.txt']
+        mock_zip_instance.namelist.return_value = zip_contents
+        mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
 
         # Call the function
         extracted_files = f_235(MOCK_URL, MOCK_DESTINATION_DIR)
         # Assertions
         mock_requests_get.assert_called_once_with(MOCK_URL, headers={'accept': 'application/octet-stream'})
         mock_open.assert_called_once_with(os.path.join(MOCK_DESTINATION_DIR, 'data.zip'), 'wb')
-        self.assertEqual(extracted_files, ['file1.txt'])
+        self.assertEqual(zip_contents, mock_zip_instance.namelist())
 
 
     @patch('requests.get')
@@ -145,14 +154,17 @@ def test_4(self, mock_open, mock_join, mock_basename, mock_listdir, mock_zipfile
 
         # Mock other functions
         mock_basename.return_value = "data_download.zip"
-        mock_listdir.return_value = ['file1.txt', 'file2.xlsx']
+        mock_zip_instance = MagicMock()
+        zip_contents = ['file1.txt', 'file2.xlsx']
+        mock_zip_instance.namelist.return_value = zip_contents
+        mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
 
         # Call the function
         extracted_files = f_235(MOCK_URL, MOCK_DESTINATION_DIR)
         # Assertions
         mock_requests_get.assert_called_once_with(MOCK_URL, headers={'accept': 'application/octet-stream'})
         mock_open.assert_called_once_with(os.path.join(MOCK_DESTINATION_DIR, 'data_download.zip'), 'wb')
-        self.assertEqual(extracted_files, ['file1.txt', 'file2.xlsx'])
+        self.assertEqual(zip_contents, mock_zip_instance.namelist())
 
 
     @patch('requests.get')
@@ -170,14 +182,17 @@ def test_5(self, mock_open, mock_join, mock_basename, mock_listdir, mock_zipfile
 
         # Mock other functions
         mock_basename.return_value = "data_download.zip"
-        mock_listdir.return_value = []
+        mock_zip_instance = MagicMock()
+        zip_contents = []
+        mock_zip_instance.namelist.return_value = zip_contents
+        mock_zipfile.return_value.__enter__.return_value = mock_zip_instance
 
         # Call the function
         extracted_files = f_235(MOCK_URL, MOCK_DESTINATION_DIR)
         # Assertions
         mock_requests_get.assert_called_once_with(MOCK_URL, headers={'accept': 'application/octet-stream'})
         mock_open.assert_called_once_with(os.path.join(MOCK_DESTINATION_DIR, 'data_download.zip'), 'wb')
-        self.assertEqual(extracted_files, [])
+        self.assertEqual(zip_contents, mock_zip_instance.namelist())
 
 def run_tests():
     suite = unittest.TestSuite()