You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

LLaMa_10_prompots_0.py 5.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. from tqdm import tqdm
  2. import numpy as np
  3. import pandas as pd
  4. import re
  5. import time
  6. import json
  7. import concurrent.futures
  8. from openai import OpenAI
  9. import threading
  10. # Configuration
  11. api_key = 'YOUR API KEY'
  12. base_url = 'YOUR BASE URL'
  13. num_workers = 32 # Number of concurrent API calls
  14. client = OpenAI(api_key=api_key, base_url=base_url)
  15. df = pd.read_csv('news_dataset.csv', delimiter=';')
  16. # Thread lock to ensure safe file writing
  17. lock = threading.Lock()
  18. def get_response(prompt, max_tokens=40, max_retries=3):
  19. """Retries API call up to max_retries times if it fails."""
  20. attempt = 0
  21. while attempt < max_retries:
  22. try:
  23. response = client.chat.completions.create(
  24. model="YOUR LLAMA MODEL",
  25. messages=[{"role": "user", "content": prompt}],
  26. max_tokens=max_tokens,
  27. temperature=1e-6
  28. )
  29. return response.choices[0].message.content
  30. except Exception as e:
  31. print(f"Error on attempt {attempt+1}/{max_retries}: {e}")
  32. attempt += 1
  33. time.sleep(2) # Wait before retrying
  34. return None # Return None if all retries fail
  35. def get_prompt(news, political_perspective):
  36. """Generates the prompt based on the political perspective."""
  37. prompt_template = """You should decide if a news article is “True” or “False” from the view point of a {party} supporter. What would a {party} supporter think of the following news article?
  38. The news: {news}
  39. You must determine whether it is True (1) or False (0). Respond in the following format:
  40. <0 or 1>. Reasoning Explanation.
  41. Your explanation must be concise, up to 40 tokens. Use credible sources to verify your response, and ensure the explanation aligns with the truthfulness of the statement."""
  42. if political_perspective == 'Neutral':
  43. prompt_template = f"""I will provide a news statement below.
  44. The news: {news}
  45. You must determine whether it is True (1) or False (0). Respond in the following format:
  46. <0 or 1>. Reasoning Explanation.
  47. Your explanation must be concise, up to 30 tokens. Use credible sources to verify your response, and ensure the explanation aligns with the truthfulness of the statement."""
  48. parties = {
  49. "Democrat": "democrat",
  50. "Republican": "republican",
  51. "Neutral": "neutral"
  52. }
  53. return prompt_template.format(party=parties[political_perspective], news=news)
  54. def extract_response(response):
  55. """Extracts the validation (0 or 1) and explanation from the model output."""
  56. pattern = r"<?(\d)>?\.\s*(.*)"
  57. match = re.search(pattern, response)
  58. if match:
  59. return int(match.group(1)), match.group(2).strip()
  60. return None, response # Return raw response if format doesn't match
  61. def process_row(args):
  62. """Processes a single row in parallel."""
  63. idx, row, iter_count = args
  64. news = row['News']
  65. results = []
  66. for perspective in ['Democrat', 'Republican', 'Neutral']:
  67. for i in range(iter_count):
  68. prompt = get_prompt(news, perspective)
  69. response = get_response(prompt)
  70. if response is not None:
  71. validation, explanation = extract_response(response)
  72. else:
  73. validation, explanation = None, "Error in response"
  74. result = {
  75. 'News': news,
  76. 'Perspective': perspective,
  77. 'Iteration': i,
  78. 'Validations': validation,
  79. 'Explanations': explanation
  80. }
  81. results.append(result)
  82. # Save incrementally to avoid data loss
  83. with lock:
  84. pd.DataFrame([result]).to_csv('explanations.csv', mode='a', header=False, index=False)
  85. return idx, results # Return index and results for updating counts
  86. def run(iter_count, num_workers):
  87. """Runs the processing with parallel execution."""
  88. all_results = []
  89. with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
  90. tasks = [(idx, row, iter_count) for idx, row in df.iterrows()]
  91. for idx, results in tqdm(executor.map(process_row, tasks), total=len(df), desc="Processing rows in parallel"):
  92. all_results.extend(results)
  93. # Update counts in the main dataframe
  94. true_counts = {persp: sum(1 for r in results if r['Validations'] == 1 and r['Perspective'] == persp) for persp in ['Democrat', 'Republican', 'Neutral']}
  95. false_counts = {persp: sum(1 for r in results if r['Validations'] == 0 and r['Perspective'] == persp) for persp in ['Democrat', 'Republican', 'Neutral']}
  96. df.at[idx, 'Count True Democrat'] = true_counts['Democrat']
  97. df.at[idx, 'Count False Democrat'] = false_counts['Democrat']
  98. df.at[idx, 'Count True Republican'] = true_counts['Republican']
  99. df.at[idx, 'Count False Republican'] = false_counts['Republican']
  100. df.at[idx, 'Count True Neutral'] = true_counts['Neutral']
  101. df.at[idx, 'Count False Neutral'] = false_counts['Neutral']
  102. # Save incrementally
  103. with lock:
  104. df.iloc[[idx]].to_csv('updated.csv', mode='a', header=False, index=False)
  105. # Final saving after all processes complete
  106. df.to_csv('updated.csv', index=False)
  107. print("Processing complete. Data saved.")
  108. # Run with multiprocessing
  109. iter_count = 10
  110. run(iter_count=iter_count, num_workers=num_workers)