You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Qwen_10_prompots_0.py 6.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. from tqdm import tqdm
  2. import numpy as np
  3. import pandas as pd
  4. import re
  5. import time
  6. import json
  7. import concurrent.futures
  8. import os
  9. from openai import OpenAI
  10. import threading
  11. # Configuration
  12. api_key = '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'
  13. base_url = 'YOUR BASE URL'
  14. num_workers = 10 # Adjust based on system capacity
  15. client = OpenAI(api_key=api_key, base_url=base_url)
  16. df = pd.read_csv('news_dataset.csv', delimiter=';')
  17. # Thread lock to ensure safe file writing
  18. lock = threading.Lock()
  19. # Track progress
  20. progress_file = "progress.txt"
  21. updated_file = "updated.csv"
  22. # Load previously completed rows if they exist
  23. if os.path.exists(updated_file):
  24. completed_df = pd.read_csv(updated_file, delimiter=';')
  25. completed_indices = set(completed_df.index) # Indices of processed rows
  26. else:
  27. completed_indices = set()
  28. def get_response(prompt, max_tokens=40, max_retries=3):
  29. """Retries API call up to max_retries times if it fails."""
  30. attempt = 0
  31. while attempt < max_retries:
  32. try:
  33. response = client.chat.completions.create(
  34. model="YOUR QWEN MODEL",
  35. messages=[{"role": "user", "content": prompt}],
  36. max_tokens=max_tokens,
  37. temperature=1e-6
  38. )
  39. if response and hasattr(response, 'choices') and response.choices:
  40. return response.choices[0].message.content
  41. else:
  42. print(f"API returned an empty or invalid response. Attempt {attempt+1}/{max_retries}")
  43. time.sleep(2) # Retry delay
  44. except Exception as e:
  45. print(f"Error on attempt {attempt+1}/{max_retries}: {e}")
  46. time.sleep(2) # Wait before retrying
  47. attempt += 1
  48. return None # Return None if all retries fail
  49. def get_prompt(news, political_perspective):
  50. """Generates the prompt based on the political perspective."""
  51. prompt_template = """You should decide if a news article is “True” or “False” from the view point of a {party} supporter. What would a {party} supporter think of the following news article?
  52. The news: {news}
  53. You must determine whether it is True (1) or False (0). Respond in the following format:
  54. <0 or 1>. Reasoning Explanation.
  55. Your explanation must be concise, up to 40 tokens. Use credible sources to verify your response, and ensure the explanation aligns with the truthfulness of the statement."""
  56. if political_perspective == 'Neutral':
  57. prompt_template = f"""I will provide a news statement below.
  58. The news: {news}
  59. You must determine whether it is True (1) or False (0). Respond in the following format:
  60. <0 or 1>. Reasoning Explanation.
  61. Your explanation must be concise, up to 30 tokens. Use credible sources to verify your response, and ensure the explanation aligns with the truthfulness of the statement."""
  62. parties = {
  63. "Democrat": "democrat",
  64. "Republican": "republican",
  65. "Neutral": "neutral"
  66. }
  67. return prompt_template.format(party=parties[political_perspective], news=news)
  68. def extract_response(response):
  69. """Extracts the validation (0 or 1) and explanation from the model output."""
  70. if response is None:
  71. return None, "Error: No response from API"
  72. pattern = r"<?(\d)>?\.\s*(.*)"
  73. match = re.search(pattern, response)
  74. if match:
  75. return int(match.group(1)), match.group(2).strip()
  76. return None, response # Return raw response if format doesn't match
  77. def process_row(args):
  78. """Processes a single row in parallel."""
  79. idx, row, iter_count = args
  80. # Skip already completed rows
  81. if idx in completed_indices:
  82. print(f"Skipping row {idx}, already processed.")
  83. return None
  84. news = row['News']
  85. results = []
  86. for perspective in ['Democrat', 'Republican', 'Neutral']:
  87. for i in range(iter_count):
  88. prompt = get_prompt(news, perspective)
  89. response = get_response(prompt)
  90. validation, explanation = extract_response(response)
  91. result = {
  92. 'Index': idx,
  93. 'News': news,
  94. 'Perspective': perspective,
  95. 'Iteration': i,
  96. 'Validations': validation,
  97. 'Explanations': explanation
  98. }
  99. results.append(result)
  100. # Save incrementally to avoid data loss
  101. with lock:
  102. pd.DataFrame([result]).to_csv('explanations.csv', mode='a', header=False, index=False)
  103. # Write progress to file
  104. with lock:
  105. with open(progress_file, "a") as f:
  106. f.write(f"{idx}\n") # Store the processed index
  107. return idx, results # Return index and results for updating counts
  108. def run(iter_count, num_workers):
  109. """Runs the processing with parallel execution."""
  110. all_results = []
  111. with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
  112. tasks = [(idx, row, iter_count) for idx, row in df.iterrows() if idx not in completed_indices]
  113. for idx, results in tqdm(executor.map(process_row, tasks), total=len(tasks), desc="Processing rows in parallel"):
  114. if results is None:
  115. continue # Skip rows that were already processed
  116. all_results.extend(results)
  117. # Update counts in the main dataframe
  118. true_counts = {persp: sum(1 for r in results if r['Validations'] == 1 and r['Perspective'] == persp) for persp in ['Democrat', 'Republican', 'Neutral']}
  119. false_counts = {persp: sum(1 for r in results if r['Validations'] == 0 and r['Perspective'] == persp) for persp in ['Democrat', 'Republican', 'Neutral']}
  120. df.at[idx, 'Count True Democrat'] = true_counts['Democrat']
  121. df.at[idx, 'Count False Democrat'] = false_counts['Democrat']
  122. df.at[idx, 'Count True Republican'] = true_counts['Republican']
  123. df.at[idx, 'Count False Republican'] = false_counts['Republican']
  124. df.at[idx, 'Count True Neutral'] = true_counts['Neutral']
  125. df.at[idx, 'Count False Neutral'] = false_counts['Neutral']
  126. # Save incrementally
  127. with lock:
  128. df.iloc[[idx]].to_csv('updated.csv', mode='a', header=False, index=False)
  129. # Final saving after all processes complete
  130. df.to_csv('updated.csv', index=False)
  131. print("Processing complete. Data saved.")
  132. # Run with multiprocessing and resume support
  133. iter_count = 10
  134. run(iter_count=iter_count, num_workers=num_workers)