Skip to content

Commit

Permalink
fix: fix data due to the parsing error
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyz committed May 4, 2024
1 parent f64400f commit 828dbab
Show file tree
Hide file tree
Showing 89 changed files with 844 additions and 841 deletions.
7 changes: 4 additions & 3 deletions data/clean/f_801_wenhao.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import re


def f_801(text, seed=0):
def f_801(text, seed=None):
"""
Scramble the letters in each word of a given text, keeping the first and last letters of each word intact.
Parameters:
text (str): The text to be scrambled.
seed (int, optional): A seed for the random number generator to ensure reproducible results.
Defaults to 0.
Defaults to None (not set).
Returns:
str: The scrambled text.
Expand All @@ -28,7 +28,8 @@ def f_801(text, seed=0):
>>> f_801("Programming is fun, isn't it?", 42)
"Prmiangmrog is fun, isn't it?"
"""
random.seed(seed)
if seed is not None:
random.seed(seed)

def scramble_word(match):
word = match.group(0)
Expand Down
1,430 changes: 715 additions & 715 deletions data/open-eval.jsonl

Large diffs are not rendered by default.

Binary file modified data/open-eval.jsonl.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion data/processed/f_1708_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def f_335(request, session_expire_time):
then sets this key in a cookie on an HttpResponse object with the specified expiration time.
Parameters:
request (django.http.HttpRequest): The inco Django HttpRequest.
request (django.http.HttpRequest): The incoming Django HttpRequest.
session_expire_time (int): The expiration time for the session cookie in seconds.
Returns:
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_1710_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def f_400(request, header, csv_data):
CSV file in response to a user request on a Django web application.
Parameters:
request (HttpRequest): The inco Django HttpRequest.
request (HttpRequest): The incoming Django HttpRequest.
header (list of str): List of strings representing the header of the CSV file.
csv_data (list of list of str): List of rows, with each row being a list of strings, to be written into the CSV file.
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_1711_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def f_236(request, file_paths):
is not utilized within the function but is required for compatibility with Django view structures.
Parameters:
- request (HttpRequest): The inco Django HttpRequest, not used within the function.
- request (HttpRequest): The incoming Django HttpRequest, not used within the function.
- file_paths (list of str): A list of file paths or file contents to be included in the zip.
Returns:
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_1712_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def f_475(template_folder):
Returns:
flask.app.Flask: A Flask application instance configured with a root route that handles POST requests.
The route logs inco request data as JSON and serves the 'index.html' template with the provided data.
The route logs incoming request data as JSON and serves the 'index.html' template with the provided data.
Requirements:
- flask.Flask
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_1715_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_logout_route_redirects_to_login(self):
with self.client as client:
# Simulate an authenticated session
with client.session_transaction() as sess:
sess['user_id'] = 'testuser' # Assu the user loader can use this to load the user
sess['user_id'] = 'testuser' # Assuming the user loader can use this to load the user
# Manually set current_user for the duration of the test
with patch('flask_login.utils._get_user') as mock_current_user:
mock_user = MagicMock()
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_1728_hanhu_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def test_pdf_overlay_accuracy(self):
mean, std_dev, num_samples = 0, 1, 1000
_, fig = f_334(mean, std_dev, num_samples)
ax = fig.axes[0]
line = ax.get_lines()[0] # Assu the first line is the PDF
line = ax.get_lines()[0] # Assuming the first line is the PDF
x, y = line.get_data()
expected_y = norm.pdf(x, mean, std_dev)
np.testing.assert_array_almost_equal(y, expected_y, decimal=2)
2 changes: 1 addition & 1 deletion data/processed/f_1764_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def f_668(ROOT_DIR, DEST_DIR, SPECIFIC_HASH):
- hashlib
Examples:
>>> # Assu the correct paths are given for ROOT_DIR, DEST_DIR,
>>> # Assuming the correct paths are given for ROOT_DIR, DEST_DIR,
>>> # and at least one file in ROOT_DIR matches SPECIFIC_HASH:
>>> type(f_668('/path/to/root', '/path/to/dest', 'd41d8cd98f00b204e9800998ecf8427e')) is int
True
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_1893_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_return_type(self, mock_get):
@patch('requests.get')
def test_handle_exceptions(self, mock_get):
"""Test that the function handles exceptions properly by not including IPs with failed requests."""
mock_get.side_effect = [requests.exceptions.ConnectionError] * 4 # Assu a /30 subnet, resulting in 4 attempts.
mock_get.side_effect = [requests.exceptions.ConnectionError] * 4 # Assuming a /30 subnet, resulting in 4 attempts.
result = f_341('192.168.0.0/30', 5)
# The expected result is adjusted since the function no longer returns False for failed requests but instead skips them.
expected_result = [] # Expecting an empty list due to ConnectionError.
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_203_wending_chien_minor_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,6 @@ def test_empty_keys(self):
f_605(self.data_dict, data_keys)
def test_key_not_in_dict(self):
# Test with a key that's not in the dictionary
data_keys = ['D'] # Assu 'D' is not in `data_dict`
data_keys = ['D'] # Assuming 'D' is not in `data_dict`
with self.assertRaises(ValueError):
f_605(self.data_dict, data_keys)
2 changes: 1 addition & 1 deletion data/processed/f_204_wending_chien_edit_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class TestCases(unittest.TestCase):
def setUp(self):
random.seed(0)
# Correctly set up the mock within the test execution context
self.patcher = patch('random.randint', side_effect=[i % 100 for i in range(800)]) # Assu 8 students and 100 course entries
self.patcher = patch('random.randint', side_effect=[i % 100 for i in range(800)]) # Assuming 8 students and 100 course entries
self.mock_randint = self.patcher.start()
self.grades_df = f_445()
self.patcher.stop()
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_219_ratna_edit_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_non_empty_data(self):
updated_data = f_114(data, key, min_value, max_value)
self.assertIsInstance(updated_data, pd.DataFrame)
self.assertTrue(key in updated_data.columns)
self.assertEqual(len(updated_data), 3) # Assu the length of the input data is 3
self.assertEqual(len(updated_data), 3) # Assuming the length of the input data is 3
self.assertTrue(all(min_value <= val <= max_value for val in updated_data[key]))

def test_negative_values(self):
Expand Down
4 changes: 2 additions & 2 deletions data/processed/f_219_wending_chien_edit_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def f_426(df):
class TestCases(unittest.TestCase):
def test_case_1(self):
data_1 = pd.DataFrame({
'Title': ['How to code?', 'What is Python?', 'The art of program', 'How to cook?', 'What is life?'],
'Title': ['How to code?', 'What is Python?', 'The art of programming', 'How to cook?', 'What is life?'],
'Views': [1000, 500, 200, 300, 800],
'Likes': [500, 250, 100, 150, 600]
})
Expand Down Expand Up @@ -86,7 +86,7 @@ def test_case_3(self):
self.assertIsInstance(ax, matplotlib.axes.Axes, "The returned object should be of type Axes.")
def test_case_4(self):
data_4 = pd.DataFrame({
'Title': ['Learning to code', 'Python basics', 'Advanced program', 'Cooking basics',
'Title': ['Learning to code', 'Python basics', 'Advanced programming', 'Cooking basics',
'Life and philosophy'],
'Views': [1100, 450, 220, 320, 850],
'Likes': [550, 225, 110, 160, 425]
Expand Down
10 changes: 5 additions & 5 deletions data/processed/f_221_wending_chien_edit_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ class TestCases(unittest.TestCase):
def setUp(self):
# Sample data for testing
self.DATA = {
'Title': ['How to code?', 'What is Python?', 'The art of program', 'How to cook?', 'What is life?'],
'Content': ['This is a tutorial about coding...', 'Python is a program language...',
'Program is an art...', 'This is a cooking tutorial...', 'Life is complicated...']
'Title': ['How to code?', 'What is Python?', 'The art of programming', 'How to cook?', 'What is life?'],
'Content': ['This is a tutorial about coding...', 'Python is a programming language...',
'Programming is an art...', 'This is a cooking tutorial...', 'Life is complicated...']
}
self.df_sample = pd.DataFrame(self.DATA)
def test_case_1(self):
Expand All @@ -73,14 +73,14 @@ def test_case_1(self):
def test_case_2(self):
# Test with no interesting articles
df_no_interesting = self.df_sample.copy()
df_no_interesting['Title'] = ['Coding 101', 'Python tutorial', 'Program basics', 'Cooking basics',
df_no_interesting['Title'] = ['Coding 101', 'Python tutorial', 'Programming basics', 'Cooking basics',
'Life basics']
ax = f_693(df_no_interesting)
self.assertEqual(len(ax.patches), 0) # No bars in the plot as no interesting articles
def test_case_3(self):
# Test with only one interesting article
df_one_interesting = self.df_sample.copy()
df_one_interesting['Title'] = ['How to play guitar?', 'Python tutorial', 'Program basics', 'Cooking basics',
df_one_interesting['Title'] = ['How to play guitar?', 'Python tutorial', 'Programming basics', 'Cooking basics',
'Life basics']
ax = f_693(df_one_interesting)
self.assertEqual(len(ax.patches), 5) # 5 unique words in the interesting article
Expand Down
12 changes: 6 additions & 6 deletions data/processed/f_222_wending_chien_edit_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def f_183(df):
Example:
>>> import pandas as pd
>>> df_sample = pd.DataFrame({
... 'Title': ['How to code?', 'What is Python?', 'The art of program', 'How to cook?', 'What is life?'],
... 'Content': ['This is a tutorial about coding...', 'Python is a program language...',
... 'Program is an art...', 'This is a cooking tutorial...', 'Life is complicated...']
... 'Title': ['How to code?', 'What is Python?', 'The art of programming', 'How to cook?', 'What is life?'],
... 'Content': ['This is a tutorial about coding...', 'Python is a programming language...',
... 'Programming is an art...', 'This is a cooking tutorial...', 'Life is complicated...']
... })
>>> f_183(df_sample)
[0, 1, 0, 1]
Expand All @@ -48,9 +48,9 @@ class TestCases(unittest.TestCase):
def setUp(self):
"""Prepare environment and variables for tests."""
self.df_sample = pd.DataFrame({
'Title': ['How to code?', 'What is Python?', 'The art of program', 'How to cook?', 'What is life?'],
'Content': ['This is a tutorial about coding...', 'Python is a program language...',
'Program is an art...', 'This is a cooking tutorial...', 'Life is complicated...']
'Title': ['How to code?', 'What is Python?', 'The art of programming', 'How to cook?', 'What is life?'],
'Content': ['This is a tutorial about coding...', 'Python is a programming language...',
'Programming is an art...', 'This is a cooking tutorial...', 'Life is complicated...']
})
os.environ['OMP_NUM_THREADS'] = '1' # Setup environment variable for deterministic parallel processing
def tearDown(self):
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_2246_hanhu_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def f_565(dic):

import unittest
from unittest.mock import patch
import folium # Assu the function f_565 and folium are imported or defined appropriately.
import folium # Assuming the function f_565 and folium are imported or defined appropriately.
class TestCases(unittest.TestCase):
def test_return_type(self):
"""Test that the function returns a tuple with a map and a dictionary."""
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_2248_hanhu_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,5 +90,5 @@ def test_map_initialization(self, mock_map):
"""Test that the map is initialized with correct latitude and longitude."""
locations = {'Loc1': {'Lat': 0, 'Lon': 0}, 'Loc2': {'Lat': 4, 'Lon': 4}}
f_555(locations)
# Assu that the map is initialized at the location of the first entry in the dictionary
# Assuming that the map is initialized at the location of the first entry in the dictionary
mock_map.assert_called_with(location=[0, 0], zoom_start=4)
2 changes: 1 addition & 1 deletion data/processed/f_240_haolan_ratna_edit_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def f_687(df, dct):
"""
This function calculates and returns the mean, median, mode, and variance for specified features in a DataFrame.
It replaces certain values in the DataFrame based on a provided dictionary mapping before perfor the calculations.
It replaces certain values in the DataFrame based on a provided dictionary mapping before performing the calculations.
Parameters:
df (DataFrame): The input DataFrame.
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_2656_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
def f_471():
"""
The f_471 method is a specialized handler for processing HTTP POST requests within a server setup,
primarily designed to handle JSON-formatted data. It meticulously checks inco requests to ensure they contain the
primarily designed to handle JSON-formatted data. It meticulously checks incoming requests to ensure they contain the
expected 'data' key and have a Content-Type header set to application/json.
If a request fails these checks, the method responds with an error status and a message indicating the specific validation failure.
Conversely, when a request satisfies these criteria, it acknowledges with a success message,
Expand Down
4 changes: 2 additions & 2 deletions data/processed/f_2657_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@

def f_542():
"""
Creates an HTTP POST request handler for processing inco data. The data is expected
Creates an HTTP POST request handler for processing incoming data. The data is expected
to be in JSON format with a key 'data'. The handler responds with a 200 success message
if the data is valid, or an error message otherwise.
The type of the response can be retrieved as 'content-type' and the length of the response as 'content-length'.
Returns:
function: A class that handles HTTP POST requests and validates inco data.
function: A class that handles HTTP POST requests and validates incoming data.
Requirements:
- cgi
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_2659_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def f_245(smtp_server, smtp_port, smtp_username, smtp_password):
"""
Creates an HTTP POST request handler that processes inco email data and sends
Creates an HTTP POST request handler that processes incoming email data and sends
an email. The email data must be a JSON object with 'subject', 'message', and 'to' keys.
The type of the response can be retrieved as 'content-type' and the length of the response as 'content-length'.
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_280_haolan_ratna_edit_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def f_540(directory):
Example:
>>> f_540("/path/to/directory")
(3, ['jquery-1.js', 'jquery-2.js', 'jquery-ui.js']) # Assu 3 jQuery files were removed
(3, ['jquery-1.js', 'jquery-2.js', 'jquery-ui.js']) # Assuming 3 jQuery files were removed
"""
logging.basicConfig(filename='jquery_removal.log', level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_287_haolan_ratna_edit_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def f_408(filename):
- shutil
Example:
>>> f_408('vmware-cmd.bat') # Assu successful execution
>>> f_408('vmware-cmd.bat') # Assuming successful execution
0
>>> f_408('nonexistent.bat') # If backup fails or file doesn't exist
-1
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_3031_hanhu_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,5 @@ def test_plot_lines(self):
"""Test that the plot includes both real and imaginary parts of the complex wave."""
_, _, ax = f_160(self.amplitude, self.frequency, self.time)
lines = ax.get_lines()
# Assu the first line is the real part and the second line is the imaginary part
# Assuming the first line is the real part and the second line is the imaginary part
self.assertEqual(len(lines), 2, "Plot does not contain two lines for real and imaginary parts")
2 changes: 1 addition & 1 deletion data/processed/f_3047_hanhu_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def test_large_numbers(self):
def test_negative_numbers(self):
"""Test function with a negative number."""
with self.assertRaises(ValueError):
f_325([-1]) # Assu we want to enforce non-negative integers only
f_325([-1]) # Assuming we want to enforce non-negative integers only
def test_very_large_number(self):
"""Test function with a very large number to check for performance or overflow issues."""
number = 20 # A reasonable choice to avoid excessive computation time in tests
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_3320_hanhu_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
def f_205(X, Y):
"""
Trains a simple neural network on given input data and target labels. The function:
- Splits the data into a training set (75%) and a test set (25%), assu the input dimension is always 2.
- Splits the data into a training set (75%) and a test set (25%), assuming the input dimension is always 2.
- Constructs a Sequential model with one dense hidden layer and a sigmoid activation function.
- Compiles the model using binary cross-entropy loss and SGD optimizer with a specified learning rate.
- Fits the model to the training data (without verbose output), also evaluating it on the test set as validation data.
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_345_jenny_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def test_case_8(self):
result = f_694(P, T)
self.assertTrue(np.allclose(result, np.zeros((3, 15))))
def test_case_9(self):
# Test DataFrame output for correct column names, ensuring they match expected feature na convention
# Test DataFrame output for correct column names, ensuring they match expected feature naming convention
P = np.random.rand(3, 3)
T = np.random.rand(3, 4, 4)
result = f_694(P, T)
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_3587_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def f_443(src_dir, dest_dir, ext):
>>> moved_files = f_443(test_src_dir, test_dest_dir, test_ext)
>>> len(moved_files) > 0 # Check if any files were moved
True
>>> 'test_file.txt' in [os.path.basename(path) for path in moved_files] # Assu test_file.txt exists in test_src_dir
>>> 'test_file.txt' in [os.path.basename(path) for path in moved_files] # Assuming test_file.txt exists in test_src_dir
True
>>> os.listdir(test_dest_dir) # Verify that files were moved, and no duplicates exist in the destination
['test_file.txt']
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_3665_hanhu_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def default(self, obj):
import unittest
from datetime import datetime
from decimal import Decimal
import pytz # Assu pytz is used for timezone information in datetime objects
import pytz # Assuming pytz is used for timezone information in datetime objects
class TestCases(unittest.TestCase):
def test_datetime_serialization(self):
"""Ensure datetime objects are serialized to an ISO 8601 string."""
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_382_jenny_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def f_704(
equal-sized portions, the last timestamp may be excluded.
- columns (list of str, optional): Names of the DataFrame columns to be included in the output.
Defaults to: ['Timestamp', 'Sensor1', 'Sensor2', 'Sensor3', 'SensorStatus'].
Regardless of na, the function will populate the first column with
Regardless of naming, the function will populate the first column with
timestamp, the middle columns with sensor data, and the final with status.
- sensor_statuses (list of str, optional): Possible statuses for the sensors to randomly assign in the dataset.
Defaults to: ['OK', 'MAINTENANCE_REQUIRED', 'ERROR'].
Expand Down
2 changes: 1 addition & 1 deletion data/processed/f_385_jenny_wo_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def test_case_5(self):
"2023-01-03 Not a valid entry\n"
"WARNING - This log entry is missing its timestamp\n"
"2023-01-04 15:00:00.000000 - INFO - System update completed\n"
"Some random text not confor to the log format\n"
"Some random text not conforming to the log format\n"
"2023-01-04 16:00:00.000000 - ERROR - Error in processing\n"
)
log_file_path = self._create_temp_log_file("log5.txt", content)
Expand Down
Loading

0 comments on commit 828dbab

Please sign in to comment.